|
@@ -7,11 +7,11 @@ pub fn shannon_entropy(s: &str) -> f32 {
|
|
|
return entropy;
|
|
|
}
|
|
|
|
|
|
- for c in 0..=255u8 {
|
|
|
+ for c in 0..=255 {
|
|
|
let count = s.matches(char::from(c)).count() as f32;
|
|
|
let slen = s.len() as f32;
|
|
|
let p : f32 = count / slen;
|
|
|
- println!("{:?} {:?} {:?}", count, slen, p);
|
|
|
+ println!("{}, {:?} {:?} {:?}", c, count, slen, p);
|
|
|
if p > 0.0 {
|
|
|
entropy += - p * p.log2();
|
|
|
}
|
|
@@ -26,10 +26,10 @@ pub fn str_shannon_entropy(s: &str) -> f32 {
|
|
|
|
|
|
if s.is_empty() {
|
|
|
return entropy;
|
|
|
- }
|
|
|
+ }
|
|
|
entropy = 1f32;
|
|
|
|
|
|
- entropy
|
|
|
+ entropy
|
|
|
}
|
|
|
|
|
|
|
|
@@ -50,7 +50,7 @@ mod tests {
|
|
|
for (test, answer) in test_strings {
|
|
|
let entropy: f32 = shannon_entropy(test);
|
|
|
assert_eq!(entropy, answer);
|
|
|
- }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
#[test]
|
|
@@ -85,28 +85,53 @@ mod tests {
|
|
|
|
|
|
#[test]
|
|
|
fn test_entropy_equal_distribution1() {
|
|
|
- let mut bytes = [0u8; 256];
|
|
|
- for i in 0..256 {
|
|
|
- bytes[i] = i as u8;
|
|
|
+ let mut stringmerda = String::new();
|
|
|
+
|
|
|
+ for _ in 0..2 {
|
|
|
+ for i in 0..=255 {
|
|
|
+ stringmerda.push(char::from(i));
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
- let h = shannon_entropy(&String::from_utf8_lossy(&bytes));
|
|
|
- assert_eq!(h, 8.0);
|
|
|
+ // println!("{:#?}", stringmerda.as_bytes());
|
|
|
+ println!("{}", stringmerda.len());
|
|
|
+ // let h = shannon_entropy(&stringmerda);
|
|
|
+
|
|
|
+ // assert_eq!(h, 8.0);
|
|
|
}
|
|
|
|
|
|
- #[test]
|
|
|
- fn test_entropy_equal_distribution2() {
|
|
|
- let mut bytes = [0u8; 256*2];
|
|
|
- for i in 0..bytes.len() {
|
|
|
- bytes[i] = (i % 256) as u8;
|
|
|
+ pub fn shannon_entropy2(bytes: &[u8]) -> f32 {
|
|
|
+ let mut entropy = 0.0;
|
|
|
+ let mut counts = [0; 256];
|
|
|
+
|
|
|
+ for &b in bytes {
|
|
|
+ counts[b as usize] += 1;
|
|
|
}
|
|
|
|
|
|
- let pippo = String::from(&bytes);
|
|
|
- println!("{:?} {:?}", pippo, pippo.len());
|
|
|
- let h = shannon_entropy(&pippo);
|
|
|
- assert_eq!(h, 8.0);
|
|
|
+ for &count in counts.iter() {
|
|
|
+ if count == 0 { continue }
|
|
|
+
|
|
|
+ let p: f32 = (count as f32) / (bytes.len() as f32);
|
|
|
+ entropy -= p * p.log(2.0);
|
|
|
+ }
|
|
|
+
|
|
|
+ entropy
|
|
|
}
|
|
|
|
|
|
+ // #[test]
|
|
|
+ // fn test_entropy_equal_distribution2() {
|
|
|
+ // let mut bytes = [0u8; 256*2];
|
|
|
+ // for i in 0..bytes.len() {
|
|
|
+ // bytes[i] = (i % 256) as u8;
|
|
|
+ // }
|
|
|
+
|
|
|
+ // println!("{}", bytes.len());
|
|
|
+ // let pippo = String::from_utf8_lossy(&bytes);
|
|
|
+ // println!("{}", pippo.len());
|
|
|
+ // let h = shannon_entropy(&pippo);
|
|
|
+ // assert_eq!(h, 8.0);
|
|
|
+ // }
|
|
|
+
|
|
|
#[test]
|
|
|
fn test_entropy_helloworld() {
|
|
|
let h = shannon_entropy("hello, world");
|