forked from zero/shannon-entropy-rs
Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
f31ebf488d |
175
src/lib.rs
175
src/lib.rs
@ -1,142 +1,131 @@
|
|||||||
|
pub const BASE64_ALPHABET : &str = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=-_";
|
||||||
|
pub const HEX_ALPHABET : &str = "ABCDEFabcdef0123456789";
|
||||||
|
|
||||||
// Calculate the shannon entropy of a given string for a given alphabeth
|
// Calculate the shannon entropy of a given byte slice
|
||||||
pub fn shannon_entropy(s: &str) -> f32 {
|
pub fn shannon_entropy(bytes: &[u8]) -> f32 {
|
||||||
let mut entropy = 0f32;
|
let mut entropy = 0.0;
|
||||||
|
let mut counts = [0; 256];
|
||||||
|
|
||||||
if s.is_empty() {
|
for &b in bytes {
|
||||||
return entropy;
|
counts[b as usize] += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
for c in 0..=255 {
|
for &count in counts.iter() {
|
||||||
let count = s.matches(char::from(c)).count() as f32;
|
if count == 0 { continue }
|
||||||
let slen = s.len() as f32;
|
|
||||||
let p : f32 = count / slen;
|
let p: f32 = (count as f32) / (bytes.len() as f32);
|
||||||
println!("{}, {:?} {:?} {:?}", c, count, slen, p);
|
entropy -= p * p.log(2.0);
|
||||||
if p > 0.0 {
|
|
||||||
entropy += - p * p.log2();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
entropy
|
entropy
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate the shannon entropy of a given string detecting the used alphabeth
|
// Calculate the shannon entropy of a given byte slice for a specific charset
|
||||||
pub fn str_shannon_entropy(s: &str) -> f32 {
|
pub fn shannon_entropy_charset(bytes: &[u8], charset: &str) -> f32 {
|
||||||
let mut entropy = 0f32;
|
let mut entropy = 0.0;
|
||||||
|
|
||||||
if s.is_empty() {
|
for single_char in charset.chars() {
|
||||||
return entropy;
|
let count = bytes.iter().filter(|&&n| n as char == single_char).count();
|
||||||
|
if count == 0 { continue }
|
||||||
|
let p: f32 = (count as f32) / (bytes.len() as f32);
|
||||||
|
entropy -= p * p.log(2.0);
|
||||||
}
|
}
|
||||||
entropy = 1f32;
|
|
||||||
|
|
||||||
entropy
|
entropy
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Determine if a string is made up only of hexdigits
|
||||||
|
pub fn is_hex_str(s: &str) -> bool {
|
||||||
|
s.chars().filter(|&n| !(n.is_digit(16))).count() == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine if a string is made up only of base64/base64url digits
|
||||||
|
pub fn is_base64_str(s: &str) -> bool {
|
||||||
|
let alphabet: Vec<_> = BASE64_ALPHABET.chars().collect();
|
||||||
|
s.chars().filter(|n| !(alphabet.contains(n))).count() == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the shannon entropy of a given string
|
||||||
|
pub fn str_entropy(s: &str) -> f32 {
|
||||||
|
if is_hex_str(s) {
|
||||||
|
return shannon_entropy_charset(s.as_bytes(), HEX_ALPHABET);
|
||||||
|
}
|
||||||
|
|
||||||
|
if is_base64_str(s) {
|
||||||
|
return shannon_entropy_charset(s.as_bytes(), BASE64_ALPHABET);
|
||||||
|
}
|
||||||
|
|
||||||
|
shannon_entropy(s.as_bytes())
|
||||||
|
}
|
||||||
|
|
||||||
// Tests
|
// Tests
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::shannon_entropy;
|
use super::{shannon_entropy, str_entropy, is_hex_str, is_base64_str};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn it_works() {
|
fn test_entropy() {
|
||||||
let test_strings = vec![
|
let test_strings = vec![
|
||||||
("hello world", 2.8453512),
|
("hello world", 2.8453512),
|
||||||
("hello worldd", 2.8553884),
|
("hello worldd", 2.8553884),
|
||||||
("a", 0.0),
|
("a", 0.0),
|
||||||
|
("aaaaa", 0.0),
|
||||||
|
("ab", 1.0),
|
||||||
|
("aab", 0.9182958),
|
||||||
("", 0.0),
|
("", 0.0),
|
||||||
];
|
];
|
||||||
|
|
||||||
for (test, answer) in test_strings {
|
for (test, answer) in test_strings {
|
||||||
let entropy: f32 = shannon_entropy(test);
|
let entropy: f32 = str_entropy(test);
|
||||||
assert_eq!(entropy, answer);
|
assert_eq!(entropy, answer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_entropy_empty() {
|
|
||||||
let h = shannon_entropy("");
|
|
||||||
assert_eq!(h, 0.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_entropy_a() {
|
|
||||||
let h = shannon_entropy("a");
|
|
||||||
assert_eq!(h, 0.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_entropy_aaaaa() {
|
|
||||||
let h = shannon_entropy("aaaaa");
|
|
||||||
assert_eq!(h, 0.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_entropy_ab() {
|
|
||||||
let h = shannon_entropy("ab");
|
|
||||||
assert_eq!(h, 1.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_entropy_aab() {
|
|
||||||
let h = shannon_entropy("aab");
|
|
||||||
assert_eq!(h, 0.9182958);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_entropy_equal_distribution1() {
|
fn test_entropy_equal_distribution1() {
|
||||||
let mut stringmerda = String::new();
|
let mut bytes = [0u8; 256];
|
||||||
|
for i in 0..256 {
|
||||||
for _ in 0..2 {
|
bytes[i] = i as u8;
|
||||||
for i in 0..=255 {
|
|
||||||
stringmerda.push(char::from(i));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// println!("{:#?}", stringmerda.as_bytes());
|
let h = shannon_entropy(&bytes);
|
||||||
println!("{}", stringmerda.len());
|
assert_eq!(h, 8.0);
|
||||||
// let h = shannon_entropy(&stringmerda);
|
|
||||||
|
|
||||||
// assert_eq!(h, 8.0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn shannon_entropy2(bytes: &[u8]) -> f32 {
|
#[test]
|
||||||
let mut entropy = 0.0;
|
fn test_entropy_equal_distribution2() {
|
||||||
let mut counts = [0; 256];
|
let mut bytes = [0u8; 256*2];
|
||||||
|
for i in 0..bytes.len() {
|
||||||
for &b in bytes {
|
bytes[i] = (i % 256) as u8;
|
||||||
counts[b as usize] += 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for &count in counts.iter() {
|
let h = shannon_entropy(&bytes);
|
||||||
if count == 0 { continue }
|
assert_eq!(h, 8.0);
|
||||||
|
|
||||||
let p: f32 = (count as f32) / (bytes.len() as f32);
|
|
||||||
entropy -= p * p.log(2.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
entropy
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// #[test]
|
|
||||||
// fn test_entropy_equal_distribution2() {
|
|
||||||
// let mut bytes = [0u8; 256*2];
|
|
||||||
// for i in 0..bytes.len() {
|
|
||||||
// bytes[i] = (i % 256) as u8;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// println!("{}", bytes.len());
|
|
||||||
// let pippo = String::from_utf8_lossy(&bytes);
|
|
||||||
// println!("{}", pippo.len());
|
|
||||||
// let h = shannon_entropy(&pippo);
|
|
||||||
// assert_eq!(h, 8.0);
|
|
||||||
// }
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_entropy_helloworld() {
|
fn test_entropy_helloworld() {
|
||||||
let h = shannon_entropy("hello, world");
|
let h = str_entropy("hello, world");
|
||||||
assert_eq!(h, 3.0220551);
|
assert_eq!(h, 3.0220551);
|
||||||
let h = shannon_entropy("hello world");
|
let h = str_entropy("hello world");
|
||||||
assert_eq!(h, 2.8453512);
|
assert_eq!(h, 2.8453512);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_hex_str_reconizer() {
|
||||||
|
let s = is_hex_str("0123456789abcdef0123456789abcdef");
|
||||||
|
assert_eq!(s, true);
|
||||||
|
let s = is_hex_str("68656c6c6f20776f726c64");
|
||||||
|
assert_eq!(s, true);
|
||||||
|
let s = is_hex_str("g");
|
||||||
|
assert_eq!(s, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_base64_str_reconizer() {
|
||||||
|
let s = is_base64_str("aGVsbG8gd29ybGQ=");
|
||||||
|
assert_eq!(s, true);
|
||||||
|
let s = is_base64_str("#@$");
|
||||||
|
assert_eq!(s, false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user