pub const BASE64_ALPHABET : &str = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=-_"; pub const HEX_ALPHABET : &str = "ABCDEFabcdef0123456789"; // Calculate the shannon entropy of a given byte slice pub fn shannon_entropy(bytes: &[u8]) -> f32 { let mut entropy = 0.0; let mut counts = [0; 256]; for &b in bytes { counts[b as usize] += 1; } for &count in counts.iter() { if count == 0 { continue } let p: f32 = (count as f32) / (bytes.len() as f32); entropy -= p * p.log(2.0); } entropy } // Calculate the shannon entropy of a given byte slice for a specific charset pub fn shannon_entropy_charset(bytes: &[u8], charset: &str) -> f32 { let mut entropy = 0.0; for single_char in charset.chars() { let count = bytes.iter().filter(|&&n| n as char == single_char).count(); if count == 0 { continue } let p: f32 = (count as f32) / (bytes.len() as f32); entropy -= p * p.log(2.0); } entropy } // Determine if a string is made up only of hexdigits pub fn is_hex_str(s: &str) -> bool { s.chars().filter(|&n| !(n.is_digit(16))).count() == 0 } // Determine if a string is made up only of base64/base64url digits pub fn is_base64_str(s: &str) -> bool { let alphabet: Vec<_> = BASE64_ALPHABET.chars().collect(); s.chars().filter(|n| !(alphabet.contains(n))).count() == 0 } // Calculate the shannon entropy of a given string pub fn str_entropy(s: &str) -> f32 { if is_hex_str(s) { return shannon_entropy_charset(s.as_bytes(), HEX_ALPHABET); } if is_base64_str(s) { return shannon_entropy_charset(s.as_bytes(), BASE64_ALPHABET); } shannon_entropy(s.as_bytes()) } // Tests #[cfg(test)] mod tests { use super::{shannon_entropy, str_entropy, is_hex_str, is_base64_str}; #[test] fn test_entropy() { let test_strings = vec![ ("hello world", 2.8453512), ("hello worldd", 2.8553884), ("a", 0.0), ("aaaaa", 0.0), ("ab", 1.0), ("aab", 0.9182958), ("", 0.0), ]; for (test, answer) in test_strings { let entropy: f32 = str_entropy(test); assert_eq!(entropy, answer); } } #[test] fn test_entropy_equal_distribution1() { let mut bytes = [0u8; 256]; for i in 0..256 { bytes[i] = i as u8; } let h = shannon_entropy(&bytes); assert_eq!(h, 8.0); } #[test] fn test_entropy_equal_distribution2() { let mut bytes = [0u8; 256*2]; for i in 0..bytes.len() { bytes[i] = (i % 256) as u8; } let h = shannon_entropy(&bytes); assert_eq!(h, 8.0); } #[test] fn test_entropy_helloworld() { let h = str_entropy("hello, world"); assert_eq!(h, 3.0220551); let h = str_entropy("hello world"); assert_eq!(h, 2.8453512); } #[test] fn test_hex_str_reconizer() { let s = is_hex_str("0123456789abcdef0123456789abcdef"); assert_eq!(s, true); let s = is_hex_str("68656c6c6f20776f726c64"); assert_eq!(s, true); let s = is_hex_str("g"); assert_eq!(s, false); } #[test] fn test_base64_str_reconizer() { let s = is_base64_str("aGVsbG8gd29ybGQ="); assert_eq!(s, true); let s = is_base64_str("#@$"); assert_eq!(s, false); } }