thezero 5 years ago
commit
f1745f1b61
3 changed files with 126 additions and 0 deletions
  1. 3 0
      .gitignore
  2. 6 0
      Cargo.toml
  3. 117 0
      src/lib.rs

+ 3 - 0
.gitignore

@@ -0,0 +1,3 @@
+/target
+**/*.rs.bk
+Cargo.lock

+ 6 - 0
Cargo.toml

@@ -0,0 +1,6 @@
+[package]
+name = "shannon-entropy-rs"
+version = "0.1.0"
+authors = ["thezero <io@thezero.org>"]
+
+[dependencies]

+ 117 - 0
src/lib.rs

@@ -0,0 +1,117 @@
+
+// Calculate the shannon entropy of a given string for a given alphabeth
+pub fn shannon_entropy(s: &str) -> f32 {
+    let mut entropy = 0f32;
+
+    if s.is_empty() {
+        return entropy;
+    }
+
+    for c in 0..=255u8 {
+        let count = s.matches(char::from(c)).count() as f32;
+        let slen = s.len() as f32;
+        let p : f32 = count / slen;
+        println!("{:?} {:?} {:?}", count, slen, p);
+        if p > 0.0 {
+            entropy += - p * p.log2();
+        }
+    }
+
+    entropy
+}
+
+// Calculate the shannon entropy of a given string detecting the used alphabeth
+pub fn str_shannon_entropy(s: &str) -> f32 {
+    let mut entropy = 0f32;
+
+    if s.is_empty() {
+        return entropy;
+    } 
+    entropy = 1f32;
+
+    entropy 
+}
+
+
+// Tests
+#[cfg(test)]
+mod tests {
+    use super::shannon_entropy;
+
+    #[test]
+    fn it_works() {
+        let test_strings = vec![
+            ("hello world", 2.8453512),
+            ("hello worldd", 2.8553884),
+            ("a", 0.0),
+            ("", 0.0),
+        ];
+
+        for (test, answer) in test_strings {
+            let entropy: f32 = shannon_entropy(test);
+            assert_eq!(entropy, answer);
+        }  
+    }
+
+    #[test]
+    fn test_entropy_empty() {
+        let h = shannon_entropy("");
+        assert_eq!(h, 0.0);
+    }
+
+    #[test]
+    fn test_entropy_a() {
+        let h = shannon_entropy("a");
+        assert_eq!(h, 0.0);
+    }
+
+    #[test]
+    fn test_entropy_aaaaa() {
+        let h = shannon_entropy("aaaaa");
+        assert_eq!(h, 0.0);
+    }
+
+    #[test]
+    fn test_entropy_ab() {
+        let h = shannon_entropy("ab");
+        assert_eq!(h, 1.0);
+    }
+
+    #[test]
+    fn test_entropy_aab() {
+        let h = shannon_entropy("aab");
+        assert_eq!(h, 0.9182958);
+    }
+
+    #[test]
+    fn test_entropy_equal_distribution1() {
+        let mut bytes = [0u8; 256];
+        for i in 0..256 {
+            bytes[i] = i as u8;
+        }
+
+        let h = shannon_entropy(&String::from_utf8_lossy(&bytes));
+        assert_eq!(h, 8.0);
+    }
+
+    #[test]
+    fn test_entropy_equal_distribution2() {
+        let mut bytes = [0u8; 256*2];
+        for i in 0..bytes.len() {
+            bytes[i] = (i % 256) as u8;
+        }
+
+        let pippo = String::from(&bytes);
+        println!("{:?} {:?}", pippo, pippo.len());
+        let h = shannon_entropy(&pippo);
+        assert_eq!(h, 8.0);
+    }
+
+    #[test]
+    fn test_entropy_helloworld() {
+        let h = shannon_entropy("hello, world");
+        assert_eq!(h, 3.0220551);
+        let h = shannon_entropy("hello world");
+        assert_eq!(h, 2.8453512);
+    }
+}