threatflux_binary_analysis/analysis/
entropy.rs

1//! Entropy analysis for binary files
2
3use crate::{
4    types::{EntropyAnalysis, EntropyRegion, ObfuscationLevel, PackingIndicators},
5    BinaryFile, Result,
6};
7use std::collections::HashMap;
8
9// Note: Advanced statistical analysis planned for future entropy features
10// #[cfg(feature = "entropy-analysis")]
11// use statrs::statistics::Statistics;
12
13/// Analyze entropy of a binary file
14pub fn analyze_binary(binary: &BinaryFile) -> Result<EntropyAnalysis> {
15    let data = binary.data();
16
17    // Calculate overall entropy
18    let overall_entropy = calculate_entropy(data);
19
20    // Calculate section-wise entropy
21    let mut section_entropy = HashMap::new();
22    for section in binary.sections() {
23        let start = section.offset as usize;
24        let end = (section.offset + section.size) as usize;
25
26        if start < data.len() && end <= data.len() && start < end {
27            let section_data = &data[start..end];
28            let entropy = calculate_entropy(section_data);
29            section_entropy.insert(section.name.clone(), entropy);
30        }
31    }
32
33    // Find high entropy regions
34    let high_entropy_regions = find_high_entropy_regions(data)?;
35
36    // Analyze packing indicators
37    let packing_indicators = analyze_packing_indicators(data, &section_entropy);
38
39    Ok(EntropyAnalysis {
40        overall_entropy,
41        section_entropy,
42        high_entropy_regions,
43        packing_indicators,
44    })
45}
46
47/// Calculate Shannon entropy for data
48fn calculate_entropy(data: &[u8]) -> f64 {
49    if data.is_empty() {
50        return 0.0;
51    }
52
53    // Count byte frequencies
54    let mut freq = [0u32; 256];
55    for &byte in data {
56        freq[byte as usize] += 1;
57    }
58
59    // Calculate entropy
60    let len = data.len() as f64;
61    let mut entropy = 0.0;
62
63    for &count in &freq {
64        if count > 0 {
65            let p = count as f64 / len;
66            entropy -= p * p.log2();
67        }
68    }
69
70    entropy
71}
72
73/// Find regions with high entropy
74fn find_high_entropy_regions(data: &[u8]) -> Result<Vec<EntropyRegion>> {
75    let mut regions = Vec::new();
76    let chunk_size = 1024; // Analyze in 1KB chunks
77    let high_entropy_threshold = 7.5; // Threshold for high entropy
78
79    for (i, chunk) in data.chunks(chunk_size).enumerate() {
80        let entropy = calculate_entropy(chunk);
81
82        if entropy > high_entropy_threshold {
83            let start = i * chunk_size;
84            let end = std::cmp::min(start + chunk_size, data.len());
85
86            let description = classify_high_entropy_region(chunk, entropy);
87
88            regions.push(EntropyRegion {
89                start: start as u64,
90                end: end as u64,
91                entropy,
92                description,
93            });
94        }
95    }
96
97    Ok(regions)
98}
99
100/// Classify what might cause high entropy in a region
101fn classify_high_entropy_region(data: &[u8], entropy: f64) -> String {
102    if entropy > 7.9 {
103        "Likely encrypted or compressed data".to_string()
104    } else if entropy > 7.5 {
105        // Check for patterns that might indicate specific types
106        if has_crypto_constants(data) {
107            "Possible cryptographic constants".to_string()
108        } else if has_compression_signature(data) {
109            "Possible compressed data".to_string()
110        } else {
111            "High entropy region - possible obfuscation".to_string()
112        }
113    } else {
114        "Moderately high entropy".to_string()
115    }
116}
117
118/// Check for cryptographic constants
119fn has_crypto_constants(data: &[u8]) -> bool {
120    // Look for common crypto constants (simplified)
121    #[allow(clippy::type_complexity)]
122    const CRYPTO_CONSTANTS: &[&[u8]] = &[
123        b"\x67\x45\x23\x01", // MD5 constant
124        b"\x01\x23\x45\x67", // Another common constant
125        b"\x89\xab\xcd\xef", // Another common constant
126    ];
127
128    for &constant in CRYPTO_CONSTANTS {
129        if data.windows(constant.len()).any(|w| w == constant) {
130            return true;
131        }
132    }
133
134    false
135}
136
137/// Check for compression signatures
138fn has_compression_signature(data: &[u8]) -> bool {
139    if data.len() < 4 {
140        return false;
141    }
142
143    // Check for common compression signatures
144    matches!(&data[0..2], b"\x1f\x8b") || // GZIP
145    matches!(&data[0..4], b"PK\x03\x04") || // ZIP
146    matches!(&data[0..3], b"BZh") || // BZIP2
147    matches!(&data[0..4], b"\xfd7zXZ") // XZ
148}
149
150/// Analyze indicators of packing/obfuscation
151fn analyze_packing_indicators(
152    data: &[u8],
153    section_entropy: &HashMap<String, f64>,
154) -> PackingIndicators {
155    let mut indicators = PackingIndicators::default();
156
157    // Check overall entropy
158    let overall_entropy = calculate_entropy(data);
159
160    // High overall entropy suggests packing
161    if overall_entropy > 7.5 {
162        indicators.is_packed = true;
163    }
164
165    // Check for high entropy in code sections
166    let mut high_entropy_code = false;
167    for (name, &entropy) in section_entropy {
168        if (name.contains("text") || name.contains("code")) && entropy > 7.0 {
169            high_entropy_code = true;
170            break;
171        }
172    }
173
174    if high_entropy_code {
175        indicators.is_packed = true;
176    }
177
178    // Estimate compression ratio (simplified)
179    if indicators.is_packed {
180        // This is a very rough estimate
181        indicators.compression_ratio = Some(overall_entropy / 8.0);
182    }
183
184    // Determine obfuscation level
185    indicators.obfuscation_level = if overall_entropy > 7.8 {
186        ObfuscationLevel::High
187    } else if overall_entropy > 7.5 {
188        ObfuscationLevel::Medium
189    } else if overall_entropy > 7.0 {
190        ObfuscationLevel::Low
191    } else {
192        ObfuscationLevel::None
193    };
194
195    // Try to identify specific packers (simplified)
196    indicators.packer_name = detect_packer(data);
197
198    indicators
199}
200
201/// Attempt to detect specific packers
202fn detect_packer(data: &[u8]) -> Option<String> {
203    // This is a very simplified packer detection
204    // In practice, this would use a database of packer signatures
205
206    if data.len() < 1024 {
207        return None;
208    }
209
210    // Check for common packer strings (simplified)
211    let data_str = String::from_utf8_lossy(&data[..std::cmp::min(1024, data.len())]);
212
213    if data_str.contains("UPX") {
214        Some("UPX".to_string())
215    } else if data_str.contains("VMProtect") {
216        Some("VMProtect".to_string())
217    } else if data_str.contains("Themida") {
218        Some("Themida".to_string())
219    } else if data_str.contains("ASPack") {
220        Some("ASPack".to_string())
221    } else {
222        None
223    }
224}
225
226#[cfg(test)]
227mod tests {
228    use super::*;
229
230    #[test]
231    fn test_entropy_calculation() {
232        // Test with uniform data (low entropy)
233        let uniform_data = vec![0u8; 1024];
234        let entropy = calculate_entropy(&uniform_data);
235        assert!(entropy < 1.0);
236
237        // Test with random-like data (high entropy)
238        let random_data: Vec<u8> = (0..1024).map(|i| (i * 7 + 13) as u8).collect();
239        let entropy = calculate_entropy(&random_data);
240        assert!(entropy > 7.0);
241    }
242
243    #[test]
244    fn test_crypto_constants_detection() {
245        let data = b"\x67\x45\x23\x01some other data";
246        assert!(has_crypto_constants(data));
247
248        let data = b"no crypto constants here";
249        assert!(!has_crypto_constants(data));
250    }
251
252    #[test]
253    fn test_compression_signature_detection() {
254        // Test GZIP signature
255        let gzip_data = b"\x1f\x8b\x08\x00";
256        assert!(has_compression_signature(gzip_data));
257
258        // Test ZIP signature
259        let zip_data = b"PK\x03\x04";
260        assert!(has_compression_signature(zip_data));
261
262        // Test no compression
263        let normal_data = b"normal data";
264        assert!(!has_compression_signature(normal_data));
265    }
266}