lib.rs 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. pub const BASE64_ALPHABET : &str = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=-_";
  2. pub const HEX_ALPHABET : &str = "ABCDEFabcdef0123456789";
  3. // Calculate the shannon entropy of a given byte slice
  4. pub fn shannon_entropy(bytes: &[u8]) -> f32 {
  5. let mut entropy = 0.0;
  6. let mut counts = [0; 256];
  7. for &b in bytes {
  8. counts[b as usize] += 1;
  9. }
  10. for &count in counts.iter() {
  11. if count == 0 { continue }
  12. let p: f32 = (count as f32) / (bytes.len() as f32);
  13. entropy -= p * p.log(2.0);
  14. }
  15. entropy
  16. }
  17. // Calculate the shannon entropy of a given byte slice for a specific charset
  18. pub fn shannon_entropy_charset(bytes: &[u8], charset: &str) -> f32 {
  19. let mut entropy = 0.0;
  20. for single_char in charset.chars() {
  21. let count = bytes.iter().filter(|&&n| n as char == single_char).count();
  22. if count == 0 { continue }
  23. let p: f32 = (count as f32) / (bytes.len() as f32);
  24. entropy -= p * p.log(2.0);
  25. }
  26. entropy
  27. }
  28. // Determine if a string is made up only of hexdigits
  29. pub fn is_hex_str(s: &str) -> bool {
  30. s.chars().filter(|&n| !(n.is_digit(16))).count() == 0
  31. }
  32. // Determine if a string is made up only of base64/base64url digits
  33. pub fn is_base64_str(s: &str) -> bool {
  34. let alphabet: Vec<_> = BASE64_ALPHABET.chars().collect();
  35. s.chars().filter(|n| !(alphabet.contains(n))).count() == 0
  36. }
  37. // Calculate the shannon entropy of a given string
  38. pub fn str_entropy(s: &str) -> f32 {
  39. if is_hex_str(s) {
  40. return shannon_entropy_charset(s.as_bytes(), HEX_ALPHABET);
  41. }
  42. if is_base64_str(s) {
  43. return shannon_entropy_charset(s.as_bytes(), BASE64_ALPHABET);
  44. }
  45. shannon_entropy(s.as_bytes())
  46. }
  47. // Tests
  48. #[cfg(test)]
  49. mod tests {
  50. use super::{shannon_entropy, str_entropy, is_hex_str, is_base64_str};
  51. #[test]
  52. fn test_entropy() {
  53. let test_strings = vec![
  54. ("hello world", 2.8453512),
  55. ("hello worldd", 2.8553884),
  56. ("a", 0.0),
  57. ("aaaaa", 0.0),
  58. ("ab", 1.0),
  59. ("aab", 0.9182958),
  60. ("", 0.0),
  61. ];
  62. for (test, answer) in test_strings {
  63. let entropy: f32 = str_entropy(test);
  64. assert_eq!(entropy, answer);
  65. }
  66. }
  67. #[test]
  68. fn test_entropy_equal_distribution1() {
  69. let mut bytes = [0u8; 256];
  70. for i in 0..256 {
  71. bytes[i] = i as u8;
  72. }
  73. let h = shannon_entropy(&bytes);
  74. assert_eq!(h, 8.0);
  75. }
  76. #[test]
  77. fn test_entropy_equal_distribution2() {
  78. let mut bytes = [0u8; 256*2];
  79. for i in 0..bytes.len() {
  80. bytes[i] = (i % 256) as u8;
  81. }
  82. let h = shannon_entropy(&bytes);
  83. assert_eq!(h, 8.0);
  84. }
  85. #[test]
  86. fn test_entropy_helloworld() {
  87. let h = str_entropy("hello, world");
  88. assert_eq!(h, 3.0220551);
  89. let h = str_entropy("hello world");
  90. assert_eq!(h, 2.8453512);
  91. }
  92. #[test]
  93. fn test_hex_str_reconizer() {
  94. let s = is_hex_str("0123456789abcdef0123456789abcdef");
  95. assert_eq!(s, true);
  96. let s = is_hex_str("68656c6c6f20776f726c64");
  97. assert_eq!(s, true);
  98. let s = is_hex_str("g");
  99. assert_eq!(s, false);
  100. }
  101. #[test]
  102. fn test_base64_str_reconizer() {
  103. let s = is_base64_str("aGVsbG8gd29ybGQ=");
  104. assert_eq!(s, true);
  105. let s = is_base64_str("#@$");
  106. assert_eq!(s, false);
  107. }
  108. }