// Calculate the shannon entropy of a given string for a given alphabeth pub fn shannon_entropy(s: &str) -> f32 { let mut entropy = 0f32; if s.is_empty() { return entropy; } for c in 0..=255 { let count = s.matches(char::from(c)).count() as f32; let slen = s.len() as f32; let p : f32 = count / slen; println!("{}, {:?} {:?} {:?}", c, count, slen, p); if p > 0.0 { entropy += - p * p.log2(); } } entropy } // Calculate the shannon entropy of a given string detecting the used alphabeth pub fn str_shannon_entropy(s: &str) -> f32 { let mut entropy = 0f32; if s.is_empty() { return entropy; } entropy = 1f32; entropy } // Tests #[cfg(test)] mod tests { use super::shannon_entropy; #[test] fn it_works() { let test_strings = vec![ ("hello world", 2.8453512), ("hello worldd", 2.8553884), ("a", 0.0), ("", 0.0), ]; for (test, answer) in test_strings { let entropy: f32 = shannon_entropy(test); assert_eq!(entropy, answer); } } #[test] fn test_entropy_empty() { let h = shannon_entropy(""); assert_eq!(h, 0.0); } #[test] fn test_entropy_a() { let h = shannon_entropy("a"); assert_eq!(h, 0.0); } #[test] fn test_entropy_aaaaa() { let h = shannon_entropy("aaaaa"); assert_eq!(h, 0.0); } #[test] fn test_entropy_ab() { let h = shannon_entropy("ab"); assert_eq!(h, 1.0); } #[test] fn test_entropy_aab() { let h = shannon_entropy("aab"); assert_eq!(h, 0.9182958); } #[test] fn test_entropy_equal_distribution1() { let mut stringmerda = String::new(); for _ in 0..2 { for i in 0..=255 { stringmerda.push(char::from(i)); } } // println!("{:#?}", stringmerda.as_bytes()); println!("{}", stringmerda.len()); // let h = shannon_entropy(&stringmerda); // assert_eq!(h, 8.0); } pub fn shannon_entropy2(bytes: &[u8]) -> f32 { let mut entropy = 0.0; let mut counts = [0; 256]; for &b in bytes { counts[b as usize] += 1; } for &count in counts.iter() { if count == 0 { continue } let p: f32 = (count as f32) / (bytes.len() as f32); entropy -= p * p.log(2.0); } entropy } // #[test] // fn test_entropy_equal_distribution2() { // let mut bytes = [0u8; 256*2]; // for i in 0..bytes.len() { // bytes[i] = (i % 256) as u8; // } // println!("{}", bytes.len()); // let pippo = String::from_utf8_lossy(&bytes); // println!("{}", pippo.len()); // let h = shannon_entropy(&pippo); // assert_eq!(h, 8.0); // } #[test] fn test_entropy_helloworld() { let h = shannon_entropy("hello, world"); assert_eq!(h, 3.0220551); let h = shannon_entropy("hello world"); assert_eq!(h, 2.8453512); } }