123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131 |
- pub const BASE64_ALPHABET : &str = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=-_";
- pub const HEX_ALPHABET : &str = "ABCDEFabcdef0123456789";
- // Calculate the shannon entropy of a given byte slice
- pub fn shannon_entropy(bytes: &[u8]) -> f32 {
- let mut entropy = 0.0;
- let mut counts = [0; 256];
- for &b in bytes {
- counts[b as usize] += 1;
- }
- for &count in counts.iter() {
- if count == 0 { continue }
- let p: f32 = (count as f32) / (bytes.len() as f32);
- entropy -= p * p.log(2.0);
- }
- entropy
- }
- // Calculate the shannon entropy of a given byte slice for a specific charset
- pub fn shannon_entropy_charset(bytes: &[u8], charset: &str) -> f32 {
- let mut entropy = 0.0;
- for single_char in charset.chars() {
- let count = bytes.iter().filter(|&&n| n as char == single_char).count();
- if count == 0 { continue }
- let p: f32 = (count as f32) / (bytes.len() as f32);
- entropy -= p * p.log(2.0);
- }
- entropy
- }
- // Determine if a string is made up only of hexdigits
- pub fn is_hex_str(s: &str) -> bool {
- s.chars().filter(|&n| !(n.is_digit(16))).count() == 0
- }
- // Determine if a string is made up only of base64/base64url digits
- pub fn is_base64_str(s: &str) -> bool {
- let alphabet: Vec<_> = BASE64_ALPHABET.chars().collect();
- s.chars().filter(|n| !(alphabet.contains(n))).count() == 0
- }
- // Calculate the shannon entropy of a given string
- pub fn str_entropy(s: &str) -> f32 {
- if is_hex_str(s) {
- return shannon_entropy_charset(s.as_bytes(), HEX_ALPHABET);
- }
- if is_base64_str(s) {
- return shannon_entropy_charset(s.as_bytes(), BASE64_ALPHABET);
- }
- shannon_entropy(s.as_bytes())
- }
- // Tests
- #[cfg(test)]
- mod tests {
- use super::{shannon_entropy, str_entropy, is_hex_str, is_base64_str};
- #[test]
- fn test_entropy() {
- let test_strings = vec![
- ("hello world", 2.8453512),
- ("hello worldd", 2.8553884),
- ("a", 0.0),
- ("aaaaa", 0.0),
- ("ab", 1.0),
- ("aab", 0.9182958),
- ("", 0.0),
- ];
- for (test, answer) in test_strings {
- let entropy: f32 = str_entropy(test);
- assert_eq!(entropy, answer);
- }
- }
- #[test]
- fn test_entropy_equal_distribution1() {
- let mut bytes = [0u8; 256];
- for i in 0..256 {
- bytes[i] = i as u8;
- }
- let h = shannon_entropy(&bytes);
- assert_eq!(h, 8.0);
- }
- #[test]
- fn test_entropy_equal_distribution2() {
- let mut bytes = [0u8; 256*2];
- for i in 0..bytes.len() {
- bytes[i] = (i % 256) as u8;
- }
- let h = shannon_entropy(&bytes);
- assert_eq!(h, 8.0);
- }
- #[test]
- fn test_entropy_helloworld() {
- let h = str_entropy("hello, world");
- assert_eq!(h, 3.0220551);
- let h = str_entropy("hello world");
- assert_eq!(h, 2.8453512);
- }
- #[test]
- fn test_hex_str_reconizer() {
- let s = is_hex_str("0123456789abcdef0123456789abcdef");
- assert_eq!(s, true);
- let s = is_hex_str("68656c6c6f20776f726c64");
- assert_eq!(s, true);
- let s = is_hex_str("g");
- assert_eq!(s, false);
- }
- #[test]
- fn test_base64_str_reconizer() {
- let s = is_base64_str("aGVsbG8gd29ybGQ=");
- assert_eq!(s, true);
- let s = is_base64_str("#@$");
- assert_eq!(s, false);
- }
- }
|