shannon-entropy-rs/src/lib.rs
2018-11-22 03:43:11 +01:00

132 lines
3.4 KiB
Rust

pub const BASE64_ALPHABET : &str = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=-_";
pub const HEX_ALPHABET : &str = "ABCDEFabcdef0123456789";
// Calculate the shannon entropy of a given byte slice
pub fn shannon_entropy(bytes: &[u8]) -> f32 {
let mut entropy = 0.0;
let mut counts = [0; 256];
for &b in bytes {
counts[b as usize] += 1;
}
for &count in counts.iter() {
if count == 0 { continue }
let p: f32 = (count as f32) / (bytes.len() as f32);
entropy -= p * p.log(2.0);
}
entropy
}
// Calculate the shannon entropy of a given byte slice for a specific charset
pub fn shannon_entropy_charset(bytes: &[u8], charset: &str) -> f32 {
let mut entropy = 0.0;
for single_char in charset.chars() {
let count = bytes.iter().filter(|&&n| n as char == single_char).count();
if count == 0 { continue }
let p: f32 = (count as f32) / (bytes.len() as f32);
entropy -= p * p.log(2.0);
}
entropy
}
// Determine if a string is made up only of hexdigits
pub fn is_hex_str(s: &str) -> bool {
s.chars().filter(|&n| !(n.is_digit(16))).count() == 0
}
// Determine if a string is made up only of base64/base64url digits
pub fn is_base64_str(s: &str) -> bool {
let alphabet: Vec<_> = BASE64_ALPHABET.chars().collect();
s.chars().filter(|n| !(alphabet.contains(n))).count() == 0
}
// Calculate the shannon entropy of a given string
pub fn str_entropy(s: &str) -> f32 {
if is_hex_str(s) {
return shannon_entropy_charset(s.as_bytes(), HEX_ALPHABET);
}
if is_base64_str(s) {
return shannon_entropy_charset(s.as_bytes(), BASE64_ALPHABET);
}
shannon_entropy(s.as_bytes())
}
// Tests
#[cfg(test)]
mod tests {
use super::{shannon_entropy, str_entropy, is_hex_str, is_base64_str};
#[test]
fn test_entropy() {
let test_strings = vec![
("hello world", 2.8453512),
("hello worldd", 2.8553884),
("a", 0.0),
("aaaaa", 0.0),
("ab", 1.0),
("aab", 0.9182958),
("", 0.0),
];
for (test, answer) in test_strings {
let entropy: f32 = str_entropy(test);
assert_eq!(entropy, answer);
}
}
#[test]
fn test_entropy_equal_distribution1() {
let mut bytes = [0u8; 256];
for i in 0..256 {
bytes[i] = i as u8;
}
let h = shannon_entropy(&bytes);
assert_eq!(h, 8.0);
}
#[test]
fn test_entropy_equal_distribution2() {
let mut bytes = [0u8; 256*2];
for i in 0..bytes.len() {
bytes[i] = (i % 256) as u8;
}
let h = shannon_entropy(&bytes);
assert_eq!(h, 8.0);
}
#[test]
fn test_entropy_helloworld() {
let h = str_entropy("hello, world");
assert_eq!(h, 3.0220551);
let h = str_entropy("hello world");
assert_eq!(h, 2.8453512);
}
#[test]
fn test_hex_str_reconizer() {
let s = is_hex_str("0123456789abcdef0123456789abcdef");
assert_eq!(s, true);
let s = is_hex_str("68656c6c6f20776f726c64");
assert_eq!(s, true);
let s = is_hex_str("g");
assert_eq!(s, false);
}
#[test]
fn test_base64_str_reconizer() {
let s = is_base64_str("aGVsbG8gd29ybGQ=");
assert_eq!(s, true);
let s = is_base64_str("#@$");
assert_eq!(s, false);
}
}