123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142 |
- // Calculate the shannon entropy of a given string for a given alphabeth
- pub fn shannon_entropy(s: &str) -> f32 {
- let mut entropy = 0f32;
- if s.is_empty() {
- return entropy;
- }
- for c in 0..=255 {
- let count = s.matches(char::from(c)).count() as f32;
- let slen = s.len() as f32;
- let p : f32 = count / slen;
- println!("{}, {:?} {:?} {:?}", c, count, slen, p);
- if p > 0.0 {
- entropy += - p * p.log2();
- }
- }
- entropy
- }
- // Calculate the shannon entropy of a given string detecting the used alphabeth
- pub fn str_shannon_entropy(s: &str) -> f32 {
- let mut entropy = 0f32;
- if s.is_empty() {
- return entropy;
- }
- entropy = 1f32;
- entropy
- }
- // Tests
- #[cfg(test)]
- mod tests {
- use super::shannon_entropy;
- #[test]
- fn it_works() {
- let test_strings = vec![
- ("hello world", 2.8453512),
- ("hello worldd", 2.8553884),
- ("a", 0.0),
- ("", 0.0),
- ];
- for (test, answer) in test_strings {
- let entropy: f32 = shannon_entropy(test);
- assert_eq!(entropy, answer);
- }
- }
- #[test]
- fn test_entropy_empty() {
- let h = shannon_entropy("");
- assert_eq!(h, 0.0);
- }
- #[test]
- fn test_entropy_a() {
- let h = shannon_entropy("a");
- assert_eq!(h, 0.0);
- }
- #[test]
- fn test_entropy_aaaaa() {
- let h = shannon_entropy("aaaaa");
- assert_eq!(h, 0.0);
- }
- #[test]
- fn test_entropy_ab() {
- let h = shannon_entropy("ab");
- assert_eq!(h, 1.0);
- }
- #[test]
- fn test_entropy_aab() {
- let h = shannon_entropy("aab");
- assert_eq!(h, 0.9182958);
- }
- #[test]
- fn test_entropy_equal_distribution1() {
- let mut stringmerda = String::new();
- for _ in 0..2 {
- for i in 0..=255 {
- stringmerda.push(char::from(i));
- }
- }
- // println!("{:#?}", stringmerda.as_bytes());
- println!("{}", stringmerda.len());
- // let h = shannon_entropy(&stringmerda);
- // assert_eq!(h, 8.0);
- }
- pub fn shannon_entropy2(bytes: &[u8]) -> f32 {
- let mut entropy = 0.0;
- let mut counts = [0; 256];
- for &b in bytes {
- counts[b as usize] += 1;
- }
- for &count in counts.iter() {
- if count == 0 { continue }
- let p: f32 = (count as f32) / (bytes.len() as f32);
- entropy -= p * p.log(2.0);
- }
- entropy
- }
- // #[test]
- // fn test_entropy_equal_distribution2() {
- // let mut bytes = [0u8; 256*2];
- // for i in 0..bytes.len() {
- // bytes[i] = (i % 256) as u8;
- // }
- // println!("{}", bytes.len());
- // let pippo = String::from_utf8_lossy(&bytes);
- // println!("{}", pippo.len());
- // let h = shannon_entropy(&pippo);
- // assert_eq!(h, 8.0);
- // }
- #[test]
- fn test_entropy_helloworld() {
- let h = shannon_entropy("hello, world");
- assert_eq!(h, 3.0220551);
- let h = shannon_entropy("hello world");
- assert_eq!(h, 2.8453512);
- }
- }
|