123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117 |
- // Calculate the shannon entropy of a given string for a given alphabeth
- pub fn shannon_entropy(s: &str) -> f32 {
- let mut entropy = 0f32;
- if s.is_empty() {
- return entropy;
- }
- for c in 0..=255u8 {
- let count = s.matches(char::from(c)).count() as f32;
- let slen = s.len() as f32;
- let p : f32 = count / slen;
- println!("{:?} {:?} {:?}", count, slen, p);
- if p > 0.0 {
- entropy += - p * p.log2();
- }
- }
- entropy
- }
- // Calculate the shannon entropy of a given string detecting the used alphabeth
- pub fn str_shannon_entropy(s: &str) -> f32 {
- let mut entropy = 0f32;
- if s.is_empty() {
- return entropy;
- }
- entropy = 1f32;
- entropy
- }
- // Tests
- #[cfg(test)]
- mod tests {
- use super::shannon_entropy;
- #[test]
- fn it_works() {
- let test_strings = vec![
- ("hello world", 2.8453512),
- ("hello worldd", 2.8553884),
- ("a", 0.0),
- ("", 0.0),
- ];
- for (test, answer) in test_strings {
- let entropy: f32 = shannon_entropy(test);
- assert_eq!(entropy, answer);
- }
- }
- #[test]
- fn test_entropy_empty() {
- let h = shannon_entropy("");
- assert_eq!(h, 0.0);
- }
- #[test]
- fn test_entropy_a() {
- let h = shannon_entropy("a");
- assert_eq!(h, 0.0);
- }
- #[test]
- fn test_entropy_aaaaa() {
- let h = shannon_entropy("aaaaa");
- assert_eq!(h, 0.0);
- }
- #[test]
- fn test_entropy_ab() {
- let h = shannon_entropy("ab");
- assert_eq!(h, 1.0);
- }
- #[test]
- fn test_entropy_aab() {
- let h = shannon_entropy("aab");
- assert_eq!(h, 0.9182958);
- }
- #[test]
- fn test_entropy_equal_distribution1() {
- let mut bytes = [0u8; 256];
- for i in 0..256 {
- bytes[i] = i as u8;
- }
- let h = shannon_entropy(&String::from_utf8_lossy(&bytes));
- assert_eq!(h, 8.0);
- }
- #[test]
- fn test_entropy_equal_distribution2() {
- let mut bytes = [0u8; 256*2];
- for i in 0..bytes.len() {
- bytes[i] = (i % 256) as u8;
- }
- let pippo = String::from(&bytes);
- println!("{:?} {:?}", pippo, pippo.len());
- let h = shannon_entropy(&pippo);
- assert_eq!(h, 8.0);
- }
- #[test]
- fn test_entropy_helloworld() {
- let h = shannon_entropy("hello, world");
- assert_eq!(h, 3.0220551);
- let h = shannon_entropy("hello world");
- assert_eq!(h, 2.8453512);
- }
- }
|