|
@@ -0,0 +1,117 @@
|
|
|
+
|
|
|
+// Calculate the shannon entropy of a given string for a given alphabeth
|
|
|
+pub fn shannon_entropy(s: &str) -> f32 {
|
|
|
+ let mut entropy = 0f32;
|
|
|
+
|
|
|
+ if s.is_empty() {
|
|
|
+ return entropy;
|
|
|
+ }
|
|
|
+
|
|
|
+ for c in 0..=255u8 {
|
|
|
+ let count = s.matches(char::from(c)).count() as f32;
|
|
|
+ let slen = s.len() as f32;
|
|
|
+ let p : f32 = count / slen;
|
|
|
+ println!("{:?} {:?} {:?}", count, slen, p);
|
|
|
+ if p > 0.0 {
|
|
|
+ entropy += - p * p.log2();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ entropy
|
|
|
+}
|
|
|
+
|
|
|
+// Calculate the shannon entropy of a given string detecting the used alphabeth
|
|
|
+pub fn str_shannon_entropy(s: &str) -> f32 {
|
|
|
+ let mut entropy = 0f32;
|
|
|
+
|
|
|
+ if s.is_empty() {
|
|
|
+ return entropy;
|
|
|
+ }
|
|
|
+ entropy = 1f32;
|
|
|
+
|
|
|
+ entropy
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+// Tests
|
|
|
+#[cfg(test)]
|
|
|
+mod tests {
|
|
|
+ use super::shannon_entropy;
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn it_works() {
|
|
|
+ let test_strings = vec![
|
|
|
+ ("hello world", 2.8453512),
|
|
|
+ ("hello worldd", 2.8553884),
|
|
|
+ ("a", 0.0),
|
|
|
+ ("", 0.0),
|
|
|
+ ];
|
|
|
+
|
|
|
+ for (test, answer) in test_strings {
|
|
|
+ let entropy: f32 = shannon_entropy(test);
|
|
|
+ assert_eq!(entropy, answer);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn test_entropy_empty() {
|
|
|
+ let h = shannon_entropy("");
|
|
|
+ assert_eq!(h, 0.0);
|
|
|
+ }
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn test_entropy_a() {
|
|
|
+ let h = shannon_entropy("a");
|
|
|
+ assert_eq!(h, 0.0);
|
|
|
+ }
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn test_entropy_aaaaa() {
|
|
|
+ let h = shannon_entropy("aaaaa");
|
|
|
+ assert_eq!(h, 0.0);
|
|
|
+ }
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn test_entropy_ab() {
|
|
|
+ let h = shannon_entropy("ab");
|
|
|
+ assert_eq!(h, 1.0);
|
|
|
+ }
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn test_entropy_aab() {
|
|
|
+ let h = shannon_entropy("aab");
|
|
|
+ assert_eq!(h, 0.9182958);
|
|
|
+ }
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn test_entropy_equal_distribution1() {
|
|
|
+ let mut bytes = [0u8; 256];
|
|
|
+ for i in 0..256 {
|
|
|
+ bytes[i] = i as u8;
|
|
|
+ }
|
|
|
+
|
|
|
+ let h = shannon_entropy(&String::from_utf8_lossy(&bytes));
|
|
|
+ assert_eq!(h, 8.0);
|
|
|
+ }
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn test_entropy_equal_distribution2() {
|
|
|
+ let mut bytes = [0u8; 256*2];
|
|
|
+ for i in 0..bytes.len() {
|
|
|
+ bytes[i] = (i % 256) as u8;
|
|
|
+ }
|
|
|
+
|
|
|
+ let pippo = String::from(&bytes);
|
|
|
+ println!("{:?} {:?}", pippo, pippo.len());
|
|
|
+ let h = shannon_entropy(&pippo);
|
|
|
+ assert_eq!(h, 8.0);
|
|
|
+ }
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn test_entropy_helloworld() {
|
|
|
+ let h = shannon_entropy("hello, world");
|
|
|
+ assert_eq!(h, 3.0220551);
|
|
|
+ let h = shannon_entropy("hello world");
|
|
|
+ assert_eq!(h, 2.8453512);
|
|
|
+ }
|
|
|
+}
|