threatflux_binary_analysis/disasm/
mod.rs

1//! Disassembly module supporting multiple disassembly engines
2//!
3//! This module provides disassembly capabilities using both Capstone and iced-x86 engines.
4//! The choice of engine can be configured based on requirements and availability.
5
6use crate::{
7    types::{Architecture, Instruction, InstructionCategory},
8    AnalysisConfig, BinaryError, BinaryFile, Result,
9};
10
11#[cfg(feature = "disasm-capstone")]
12use crate::types::ControlFlow as FlowType;
13
14#[cfg(feature = "disasm-capstone")]
15mod capstone_engine;
16
17#[cfg(feature = "disasm-iced")]
18mod iced_engine;
19
20/// Disassembly engine selection
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum DisassemblyEngine {
23    /// Use Capstone disassembly engine
24    #[cfg(feature = "disasm-capstone")]
25    Capstone,
26    /// Use iced-x86 disassembly engine  
27    #[cfg(feature = "disasm-iced")]
28    Iced,
29    /// Automatic engine selection
30    Auto,
31}
32
33/// Disassembly configuration
34#[derive(Debug, Clone)]
35pub struct DisassemblyConfig {
36    /// Preferred disassembly engine
37    pub engine: DisassemblyEngine,
38    /// Maximum number of instructions to disassemble
39    pub max_instructions: usize,
40    /// Include instruction details (operands, etc.)
41    pub detailed: bool,
42    /// Enable control flow analysis
43    pub analyze_control_flow: bool,
44    /// Skip invalid instructions
45    pub skip_invalid: bool,
46}
47
48impl Default for DisassemblyConfig {
49    fn default() -> Self {
50        Self {
51            engine: DisassemblyEngine::Auto,
52            max_instructions: 10000,
53            detailed: true,
54            analyze_control_flow: true,
55            skip_invalid: true,
56        }
57    }
58}
59
60/// Disassembler wrapper supporting multiple engines
61pub struct Disassembler {
62    config: DisassemblyConfig,
63    architecture: Architecture,
64}
65
66impl Disassembler {
67    /// Create a new disassembler for the specified architecture
68    pub fn new(architecture: Architecture) -> Result<Self> {
69        Ok(Self {
70            config: DisassemblyConfig::default(),
71            architecture,
72        })
73    }
74
75    /// Create disassembler with custom configuration
76    pub fn with_config(architecture: Architecture, config: DisassemblyConfig) -> Result<Self> {
77        Ok(Self {
78            config,
79            architecture,
80        })
81    }
82
83    /// Disassemble binary code
84    pub fn disassemble(&self, data: &[u8], address: u64) -> Result<Vec<Instruction>> {
85        let engine = self.select_engine()?;
86
87        match engine {
88            #[cfg(feature = "disasm-capstone")]
89            DisassemblyEngine::Capstone => {
90                capstone_engine::disassemble(data, address, self.architecture, &self.config)
91            }
92            #[cfg(feature = "disasm-iced")]
93            DisassemblyEngine::Iced => {
94                iced_engine::disassemble(data, address, self.architecture, &self.config)
95            }
96            DisassemblyEngine::Auto => {
97                // Try available engines in order of preference
98                #[cfg(feature = "disasm-capstone")]
99                {
100                    capstone_engine::disassemble(data, address, self.architecture, &self.config)
101                }
102                #[cfg(all(feature = "disasm-iced", not(feature = "disasm-capstone")))]
103                {
104                    iced_engine::disassemble(data, address, self.architecture, &self.config)
105                }
106                #[cfg(not(any(feature = "disasm-capstone", feature = "disasm-iced")))]
107                {
108                    Err(BinaryError::feature_not_available(
109                        "No disassembly engine available. Enable 'disasm-capstone' or 'disasm-iced' feature.",
110                    ))
111                }
112            }
113        }
114    }
115
116    /// Disassemble a specific section of a binary
117    pub fn disassemble_section(
118        &self,
119        binary: &BinaryFile,
120        section_name: &str,
121    ) -> Result<Vec<Instruction>> {
122        for section in binary.sections() {
123            if section.name == section_name {
124                if let Some(data) = &section.data {
125                    return self.disassemble(data, section.address);
126                } else {
127                    // Section data not available, would need to read from file
128                    return Err(BinaryError::invalid_data(
129                        "Section data not available for disassembly",
130                    ));
131                }
132            }
133        }
134
135        Err(BinaryError::invalid_data(format!(
136            "Section '{}' not found",
137            section_name
138        )))
139    }
140
141    /// Disassemble code at specific address with length
142    pub fn disassemble_at(
143        &self,
144        data: &[u8],
145        address: u64,
146        length: usize,
147    ) -> Result<Vec<Instruction>> {
148        if data.len() < length {
149            return Err(BinaryError::invalid_data(
150                "Insufficient data for disassembly",
151            ));
152        }
153
154        self.disassemble(&data[..length], address)
155    }
156
157    /// Select the appropriate disassembly engine
158    fn select_engine(&self) -> Result<DisassemblyEngine> {
159        match self.config.engine {
160            #[cfg(feature = "disasm-capstone")]
161            DisassemblyEngine::Capstone => Ok(DisassemblyEngine::Capstone),
162            #[cfg(feature = "disasm-iced")]
163            DisassemblyEngine::Iced => Ok(DisassemblyEngine::Iced),
164            DisassemblyEngine::Auto => {
165                // Select best engine for architecture
166                match self.architecture {
167                    Architecture::X86 | Architecture::X86_64 => {
168                        #[cfg(feature = "disasm-iced")]
169                        {
170                            Ok(DisassemblyEngine::Iced)
171                        }
172                        #[cfg(all(feature = "disasm-capstone", not(feature = "disasm-iced")))]
173                        {
174                            Ok(DisassemblyEngine::Capstone)
175                        }
176                        #[cfg(not(any(feature = "disasm-capstone", feature = "disasm-iced")))]
177                        {
178                            Err(BinaryError::feature_not_available(
179                                "No disassembly engine available",
180                            ))
181                        }
182                    }
183                    _ => {
184                        // For non-x86 architectures, prefer Capstone
185                        #[cfg(feature = "disasm-capstone")]
186                        {
187                            Ok(DisassemblyEngine::Capstone)
188                        }
189                        #[cfg(not(feature = "disasm-capstone"))]
190                        {
191                            Err(BinaryError::unsupported_arch(format!(
192                                "Architecture {:?} requires Capstone engine",
193                                self.architecture
194                            )))
195                        }
196                    }
197                }
198            }
199        }
200    }
201}
202
203/// High-level function to disassemble binary data
204pub fn disassemble_binary(
205    binary: &BinaryFile,
206    config: &AnalysisConfig,
207) -> Result<Vec<Instruction>> {
208    let disasm_config = DisassemblyConfig {
209        engine: config.disassembly_engine,
210        max_instructions: config.max_analysis_size / 16, // Estimate ~16 bytes per instruction
211        detailed: true,
212        analyze_control_flow: true,
213        skip_invalid: true,
214    };
215
216    let disassembler = Disassembler::with_config(binary.architecture(), disasm_config)?;
217
218    let mut all_instructions = Vec::new();
219
220    // Disassemble executable sections
221    for section in binary.sections() {
222        if section.permissions.execute {
223            if let Some(data) = &section.data {
224                match disassembler.disassemble(data, section.address) {
225                    Ok(mut instructions) => {
226                        all_instructions.append(&mut instructions);
227                    }
228                    Err(_) => {
229                        // Continue with other sections if one fails
230                        continue;
231                    }
232                }
233            }
234        }
235    }
236
237    Ok(all_instructions)
238}
239
240/// Determine instruction category from mnemonic
241fn categorize_instruction(mnemonic: &str) -> InstructionCategory {
242    let mnemonic_lower = mnemonic.to_lowercase();
243
244    if mnemonic_lower.starts_with("add")
245        || mnemonic_lower.starts_with("sub")
246        || mnemonic_lower.starts_with("mul")
247        || mnemonic_lower.starts_with("div")
248        || mnemonic_lower.starts_with("inc")
249        || mnemonic_lower.starts_with("dec")
250    {
251        InstructionCategory::Arithmetic
252    } else if mnemonic_lower.starts_with("and")
253        || mnemonic_lower.starts_with("or")
254        || mnemonic_lower.starts_with("xor")
255        || mnemonic_lower.starts_with("not")
256        || mnemonic_lower.starts_with("shl")
257        || mnemonic_lower.starts_with("shr")
258    {
259        InstructionCategory::Logic
260    } else if mnemonic_lower.starts_with("mov")
261        || mnemonic_lower.starts_with("lea")
262        || mnemonic_lower.starts_with("push")
263        || mnemonic_lower.starts_with("pop")
264        || mnemonic_lower.starts_with("load")
265        || mnemonic_lower.starts_with("store")
266    {
267        InstructionCategory::Memory
268    } else if mnemonic_lower.starts_with("jmp")
269        || mnemonic_lower.starts_with("je")
270        || mnemonic_lower.starts_with("jne")
271        || mnemonic_lower.starts_with("jz")
272        || mnemonic_lower.starts_with("jnz")
273        || mnemonic_lower.starts_with("call")
274        || mnemonic_lower.starts_with("ret")
275        || mnemonic_lower.starts_with("br")
276        || mnemonic_lower.starts_with("bl")
277    {
278        InstructionCategory::Control
279    } else if mnemonic_lower.starts_with("int")
280        || mnemonic_lower.starts_with("syscall")
281        || mnemonic_lower.starts_with("sysenter")
282        || mnemonic_lower.starts_with("sysexit")
283    {
284        InstructionCategory::System
285    } else if mnemonic_lower.contains("aes")
286        || mnemonic_lower.contains("sha")
287        || mnemonic_lower.contains("crypto")
288    {
289        InstructionCategory::Crypto
290    } else if mnemonic_lower.starts_with("fadd")
291        || mnemonic_lower.starts_with("fsub")
292        || mnemonic_lower.starts_with("fmul")
293        || mnemonic_lower.starts_with("fdiv")
294    {
295        InstructionCategory::Float
296    } else if mnemonic_lower.contains("xmm")
297        || mnemonic_lower.contains("ymm")
298        || mnemonic_lower.contains("zmm")
299        || mnemonic_lower.starts_with("v")
300    {
301        InstructionCategory::Vector
302    } else {
303        InstructionCategory::Unknown
304    }
305}
306
307/// Determine control flow type from instruction
308#[cfg(feature = "disasm-capstone")]
309fn analyze_control_flow(mnemonic: &str, operands: &str) -> FlowType {
310    let mnemonic_lower = mnemonic.to_lowercase();
311
312    if mnemonic_lower == "ret" || mnemonic_lower == "retn" {
313        FlowType::Return
314    } else if mnemonic_lower == "call" {
315        // Try to extract target address from operands
316        if let Some(addr) = extract_address_from_operands(operands) {
317            FlowType::Call(addr)
318        } else {
319            FlowType::Unknown // Indirect call
320        }
321    } else if mnemonic_lower.starts_with("jmp") {
322        if let Some(addr) = extract_address_from_operands(operands) {
323            FlowType::Jump(addr)
324        } else {
325            FlowType::Unknown // Indirect jump
326        }
327    } else if mnemonic_lower.starts_with('j') && mnemonic_lower.len() > 1 {
328        // Conditional jumps
329        if let Some(addr) = extract_address_from_operands(operands) {
330            FlowType::ConditionalJump(addr)
331        } else {
332            FlowType::Unknown // Indirect conditional jump
333        }
334    } else if mnemonic_lower == "int" || mnemonic_lower == "syscall" {
335        FlowType::Interrupt
336    } else {
337        FlowType::Sequential
338    }
339}
340
341/// Extract address from instruction operands (simplified)
342#[cfg(feature = "disasm-capstone")]
343fn extract_address_from_operands(operands: &str) -> Option<u64> {
344    // This is a simplified implementation
345    // Real implementation would need proper operand parsing
346
347    // Look for hex addresses
348    if let Some(hex_part) = operands.strip_prefix("0x") {
349        if let Ok(addr) = u64::from_str_radix(hex_part, 16) {
350            return Some(addr);
351        }
352    }
353
354    // Look for decimal addresses
355    if let Ok(addr) = operands.parse::<u64>() {
356        return Some(addr);
357    }
358
359    None
360}
361
362#[cfg(test)]
363mod tests {
364    use super::*;
365    use crate::types::*;
366
367    #[test]
368    fn test_disassembler_creation() {
369        let result = Disassembler::new(Architecture::X86_64);
370        assert!(result.is_ok());
371    }
372
373    #[test]
374    fn test_config_default() {
375        let config = DisassemblyConfig::default();
376        assert_eq!(config.engine, DisassemblyEngine::Auto);
377        assert_eq!(config.max_instructions, 10000);
378        assert!(config.detailed);
379        assert!(config.analyze_control_flow);
380    }
381
382    #[test]
383    fn test_instruction_categorization() {
384        assert_eq!(
385            categorize_instruction("add"),
386            InstructionCategory::Arithmetic
387        );
388        assert_eq!(categorize_instruction("mov"), InstructionCategory::Memory);
389        assert_eq!(categorize_instruction("jmp"), InstructionCategory::Control);
390        assert_eq!(categorize_instruction("and"), InstructionCategory::Logic);
391        assert_eq!(
392            categorize_instruction("syscall"),
393            InstructionCategory::System
394        );
395    }
396
397    #[test]
398    #[cfg(feature = "disasm-capstone")]
399    fn test_control_flow_analysis() {
400        assert_eq!(analyze_control_flow("ret", ""), FlowType::Return);
401        assert_eq!(
402            analyze_control_flow("call", "0x1000"),
403            FlowType::Call(0x1000)
404        );
405        assert_eq!(
406            analyze_control_flow("jmp", "0x2000"),
407            FlowType::Jump(0x2000)
408        );
409        assert_eq!(
410            analyze_control_flow("je", "0x3000"),
411            FlowType::ConditionalJump(0x3000)
412        );
413        assert_eq!(
414            analyze_control_flow("mov", "eax, ebx"),
415            FlowType::Sequential
416        );
417    }
418
419    #[test]
420    #[cfg(feature = "disasm-capstone")]
421    fn test_address_extraction() {
422        assert_eq!(extract_address_from_operands("0x1000"), Some(0x1000));
423        assert_eq!(extract_address_from_operands("4096"), Some(4096));
424        assert_eq!(extract_address_from_operands("eax"), None);
425    }
426}