threatflux_binary_analysis/analysis/
entropy.rs1use crate::{
4 types::{EntropyAnalysis, EntropyRegion, ObfuscationLevel, PackingIndicators},
5 BinaryFile, Result,
6};
7use std::collections::HashMap;
8
9pub fn analyze_binary(binary: &BinaryFile) -> Result<EntropyAnalysis> {
15 let data = binary.data();
16
17 let overall_entropy = calculate_entropy(data);
19
20 let mut section_entropy = HashMap::new();
22 for section in binary.sections() {
23 let start = section.offset as usize;
24 let end = (section.offset + section.size) as usize;
25
26 if start < data.len() && end <= data.len() && start < end {
27 let section_data = &data[start..end];
28 let entropy = calculate_entropy(section_data);
29 section_entropy.insert(section.name.clone(), entropy);
30 }
31 }
32
33 let high_entropy_regions = find_high_entropy_regions(data)?;
35
36 let packing_indicators = analyze_packing_indicators(data, §ion_entropy);
38
39 Ok(EntropyAnalysis {
40 overall_entropy,
41 section_entropy,
42 high_entropy_regions,
43 packing_indicators,
44 })
45}
46
47fn calculate_entropy(data: &[u8]) -> f64 {
49 if data.is_empty() {
50 return 0.0;
51 }
52
53 let mut freq = [0u32; 256];
55 for &byte in data {
56 freq[byte as usize] += 1;
57 }
58
59 let len = data.len() as f64;
61 let mut entropy = 0.0;
62
63 for &count in &freq {
64 if count > 0 {
65 let p = count as f64 / len;
66 entropy -= p * p.log2();
67 }
68 }
69
70 entropy
71}
72
73fn find_high_entropy_regions(data: &[u8]) -> Result<Vec<EntropyRegion>> {
75 let mut regions = Vec::new();
76 let chunk_size = 1024; let high_entropy_threshold = 7.5; for (i, chunk) in data.chunks(chunk_size).enumerate() {
80 let entropy = calculate_entropy(chunk);
81
82 if entropy > high_entropy_threshold {
83 let start = i * chunk_size;
84 let end = std::cmp::min(start + chunk_size, data.len());
85
86 let description = classify_high_entropy_region(chunk, entropy);
87
88 regions.push(EntropyRegion {
89 start: start as u64,
90 end: end as u64,
91 entropy,
92 description,
93 });
94 }
95 }
96
97 Ok(regions)
98}
99
100fn classify_high_entropy_region(data: &[u8], entropy: f64) -> String {
102 if entropy > 7.9 {
103 "Likely encrypted or compressed data".to_string()
104 } else if entropy > 7.5 {
105 if has_crypto_constants(data) {
107 "Possible cryptographic constants".to_string()
108 } else if has_compression_signature(data) {
109 "Possible compressed data".to_string()
110 } else {
111 "High entropy region - possible obfuscation".to_string()
112 }
113 } else {
114 "Moderately high entropy".to_string()
115 }
116}
117
118fn has_crypto_constants(data: &[u8]) -> bool {
120 #[allow(clippy::type_complexity)]
122 const CRYPTO_CONSTANTS: &[&[u8]] = &[
123 b"\x67\x45\x23\x01", b"\x01\x23\x45\x67", b"\x89\xab\xcd\xef", ];
127
128 for &constant in CRYPTO_CONSTANTS {
129 if data.windows(constant.len()).any(|w| w == constant) {
130 return true;
131 }
132 }
133
134 false
135}
136
137fn has_compression_signature(data: &[u8]) -> bool {
139 if data.len() < 4 {
140 return false;
141 }
142
143 matches!(&data[0..2], b"\x1f\x8b") || matches!(&data[0..4], b"PK\x03\x04") || matches!(&data[0..3], b"BZh") || matches!(&data[0..4], b"\xfd7zXZ") }
149
150fn analyze_packing_indicators(
152 data: &[u8],
153 section_entropy: &HashMap<String, f64>,
154) -> PackingIndicators {
155 let mut indicators = PackingIndicators::default();
156
157 let overall_entropy = calculate_entropy(data);
159
160 if overall_entropy > 7.5 {
162 indicators.is_packed = true;
163 }
164
165 let mut high_entropy_code = false;
167 for (name, &entropy) in section_entropy {
168 if (name.contains("text") || name.contains("code")) && entropy > 7.0 {
169 high_entropy_code = true;
170 break;
171 }
172 }
173
174 if high_entropy_code {
175 indicators.is_packed = true;
176 }
177
178 if indicators.is_packed {
180 indicators.compression_ratio = Some(overall_entropy / 8.0);
182 }
183
184 indicators.obfuscation_level = if overall_entropy > 7.8 {
186 ObfuscationLevel::High
187 } else if overall_entropy > 7.5 {
188 ObfuscationLevel::Medium
189 } else if overall_entropy > 7.0 {
190 ObfuscationLevel::Low
191 } else {
192 ObfuscationLevel::None
193 };
194
195 indicators.packer_name = detect_packer(data);
197
198 indicators
199}
200
201fn detect_packer(data: &[u8]) -> Option<String> {
203 if data.len() < 1024 {
207 return None;
208 }
209
210 let data_str = String::from_utf8_lossy(&data[..std::cmp::min(1024, data.len())]);
212
213 if data_str.contains("UPX") {
214 Some("UPX".to_string())
215 } else if data_str.contains("VMProtect") {
216 Some("VMProtect".to_string())
217 } else if data_str.contains("Themida") {
218 Some("Themida".to_string())
219 } else if data_str.contains("ASPack") {
220 Some("ASPack".to_string())
221 } else {
222 None
223 }
224}
225
226#[cfg(test)]
227mod tests {
228 use super::*;
229
230 #[test]
231 fn test_entropy_calculation() {
232 let uniform_data = vec![0u8; 1024];
234 let entropy = calculate_entropy(&uniform_data);
235 assert!(entropy < 1.0);
236
237 let random_data: Vec<u8> = (0..1024).map(|i| (i * 7 + 13) as u8).collect();
239 let entropy = calculate_entropy(&random_data);
240 assert!(entropy > 7.0);
241 }
242
243 #[test]
244 fn test_crypto_constants_detection() {
245 let data = b"\x67\x45\x23\x01some other data";
246 assert!(has_crypto_constants(data));
247
248 let data = b"no crypto constants here";
249 assert!(!has_crypto_constants(data));
250 }
251
252 #[test]
253 fn test_compression_signature_detection() {
254 let gzip_data = b"\x1f\x8b\x08\x00";
256 assert!(has_compression_signature(gzip_data));
257
258 let zip_data = b"PK\x03\x04";
260 assert!(has_compression_signature(zip_data));
261
262 let normal_data = b"normal data";
264 assert!(!has_compression_signature(normal_data));
265 }
266}