// Calculate the shannon entropy of a given string for a given alphabeth pub fn shannon_entropy(s: &str) -> f32 { let mut entropy = 0f32; if s.is_empty() { return entropy; } for c in 0..=255u8 { let count = s.matches(char::from(c)).count() as f32; let slen = s.len() as f32; let p : f32 = count / slen; println!("{:?} {:?} {:?}", count, slen, p); if p > 0.0 { entropy += - p * p.log2(); } } entropy } // Calculate the shannon entropy of a given string detecting the used alphabeth pub fn str_shannon_entropy(s: &str) -> f32 { let mut entropy = 0f32; if s.is_empty() { return entropy; } entropy = 1f32; entropy } // Tests #[cfg(test)] mod tests { use super::shannon_entropy; #[test] fn it_works() { let test_strings = vec![ ("hello world", 2.8453512), ("hello worldd", 2.8553884), ("a", 0.0), ("", 0.0), ]; for (test, answer) in test_strings { let entropy: f32 = shannon_entropy(test); assert_eq!(entropy, answer); } } #[test] fn test_entropy_empty() { let h = shannon_entropy(""); assert_eq!(h, 0.0); } #[test] fn test_entropy_a() { let h = shannon_entropy("a"); assert_eq!(h, 0.0); } #[test] fn test_entropy_aaaaa() { let h = shannon_entropy("aaaaa"); assert_eq!(h, 0.0); } #[test] fn test_entropy_ab() { let h = shannon_entropy("ab"); assert_eq!(h, 1.0); } #[test] fn test_entropy_aab() { let h = shannon_entropy("aab"); assert_eq!(h, 0.9182958); } #[test] fn test_entropy_equal_distribution1() { let mut bytes = [0u8; 256]; for i in 0..256 { bytes[i] = i as u8; } let h = shannon_entropy(&String::from_utf8_lossy(&bytes)); assert_eq!(h, 8.0); } #[test] fn test_entropy_equal_distribution2() { let mut bytes = [0u8; 256*2]; for i in 0..bytes.len() { bytes[i] = (i % 256) as u8; } let pippo = String::from(&bytes); println!("{:?} {:?}", pippo, pippo.len()); let h = shannon_entropy(&pippo); assert_eq!(h, 8.0); } #[test] fn test_entropy_helloworld() { let h = shannon_entropy("hello, world"); assert_eq!(h, 3.0220551); let h = shannon_entropy("hello world"); assert_eq!(h, 2.8453512); } }