diff --git a/src/lib.rs b/src/lib.rs index 2173705..c239685 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,88 +1,86 @@ +pub const BASE64_ALPHABET : &str = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=-_"; +pub const HEX_ALPHABET : &str = "ABCDEFabcdef0123456789"; -// Calculate the shannon entropy of a given string for a given alphabeth -pub fn shannon_entropy(s: &str) -> f32 { - let mut entropy = 0f32; +// Calculate the shannon entropy of a given byte slice +pub fn shannon_entropy(bytes: &[u8]) -> f32 { + let mut entropy = 0.0; + let mut counts = [0; 256]; - if s.is_empty() { - return entropy; + for &b in bytes { + counts[b as usize] += 1; } - for c in 0..=255u8 { - let count = s.matches(char::from(c)).count() as f32; - let slen = s.len() as f32; - let p : f32 = count / slen; - println!("{:?} {:?} {:?}", count, slen, p); - if p > 0.0 { - entropy += - p * p.log2(); - } + for &count in counts.iter() { + if count == 0 { continue } + + let p: f32 = (count as f32) / (bytes.len() as f32); + entropy -= p * p.log(2.0); } entropy } -// Calculate the shannon entropy of a given string detecting the used alphabeth -pub fn str_shannon_entropy(s: &str) -> f32 { - let mut entropy = 0f32; +// Calculate the shannon entropy of a given byte slice for a specific charset +pub fn shannon_entropy_charset(bytes: &[u8], charset: &str) -> f32 { + let mut entropy = 0.0; - if s.is_empty() { - return entropy; - } - entropy = 1f32; + for single_char in charset.chars() { + let count = bytes.iter().filter(|&&n| n as char == single_char).count(); + if count == 0 { continue } + let p: f32 = (count as f32) / (bytes.len() as f32); + entropy -= p * p.log(2.0); + } - entropy + entropy } +// Determine if a string is made up only of hexdigits +pub fn is_hex_str(s: &str) -> bool { + s.chars().filter(|&n| !(n.is_digit(16))).count() == 0 +} + +// Determine if a string is made up only of base64/base64url digits +pub fn is_base64_str(s: &str) -> bool { + let alphabet: Vec<_> = BASE64_ALPHABET.chars().collect(); + s.chars().filter(|n| !(alphabet.contains(n))).count() == 0 +} + +// Calculate the shannon entropy of a given string +pub fn str_entropy(s: &str) -> f32 { + if is_hex_str(s) { + return shannon_entropy_charset(s.as_bytes(), HEX_ALPHABET); + } + + if is_base64_str(s) { + return shannon_entropy_charset(s.as_bytes(), BASE64_ALPHABET); + } + + shannon_entropy(s.as_bytes()) +} // Tests #[cfg(test)] mod tests { - use super::shannon_entropy; + use super::{shannon_entropy, str_entropy, is_hex_str, is_base64_str}; #[test] - fn it_works() { + fn test_entropy() { let test_strings = vec![ ("hello world", 2.8453512), ("hello worldd", 2.8553884), ("a", 0.0), + ("aaaaa", 0.0), + ("ab", 1.0), + ("aab", 0.9182958), ("", 0.0), ]; for (test, answer) in test_strings { - let entropy: f32 = shannon_entropy(test); + let entropy: f32 = str_entropy(test); assert_eq!(entropy, answer); } } - #[test] - fn test_entropy_empty() { - let h = shannon_entropy(""); - assert_eq!(h, 0.0); - } - - #[test] - fn test_entropy_a() { - let h = shannon_entropy("a"); - assert_eq!(h, 0.0); - } - - #[test] - fn test_entropy_aaaaa() { - let h = shannon_entropy("aaaaa"); - assert_eq!(h, 0.0); - } - - #[test] - fn test_entropy_ab() { - let h = shannon_entropy("ab"); - assert_eq!(h, 1.0); - } - - #[test] - fn test_entropy_aab() { - let h = shannon_entropy("aab"); - assert_eq!(h, 0.9182958); - } - #[test] fn test_entropy_equal_distribution1() { let mut bytes = [0u8; 256]; @@ -90,7 +88,7 @@ mod tests { bytes[i] = i as u8; } - let h = shannon_entropy(&String::from_utf8_lossy(&bytes)); + let h = shannon_entropy(&bytes); assert_eq!(h, 8.0); } @@ -101,17 +99,33 @@ mod tests { bytes[i] = (i % 256) as u8; } - let pippo = String::from(&bytes); - println!("{:?} {:?}", pippo, pippo.len()); - let h = shannon_entropy(&pippo); + let h = shannon_entropy(&bytes); assert_eq!(h, 8.0); } #[test] fn test_entropy_helloworld() { - let h = shannon_entropy("hello, world"); + let h = str_entropy("hello, world"); assert_eq!(h, 3.0220551); - let h = shannon_entropy("hello world"); + let h = str_entropy("hello world"); assert_eq!(h, 2.8453512); } + + #[test] + fn test_hex_str_reconizer() { + let s = is_hex_str("0123456789abcdef0123456789abcdef"); + assert_eq!(s, true); + let s = is_hex_str("68656c6c6f20776f726c64"); + assert_eq!(s, true); + let s = is_hex_str("g"); + assert_eq!(s, false); + } + + #[test] + fn test_base64_str_reconizer() { + let s = is_base64_str("aGVsbG8gd29ybGQ="); + assert_eq!(s, true); + let s = is_base64_str("#@$"); + assert_eq!(s, false); + } }