threatflux_binary_analysis/disasm/
mod.rs1use crate::{
7 types::{Architecture, Instruction, InstructionCategory},
8 AnalysisConfig, BinaryError, BinaryFile, Result,
9};
10
11#[cfg(feature = "disasm-capstone")]
12use crate::types::ControlFlow as FlowType;
13
14#[cfg(feature = "disasm-capstone")]
15mod capstone_engine;
16
17#[cfg(feature = "disasm-iced")]
18mod iced_engine;
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum DisassemblyEngine {
23 #[cfg(feature = "disasm-capstone")]
25 Capstone,
26 #[cfg(feature = "disasm-iced")]
28 Iced,
29 Auto,
31}
32
33#[derive(Debug, Clone)]
35pub struct DisassemblyConfig {
36 pub engine: DisassemblyEngine,
38 pub max_instructions: usize,
40 pub detailed: bool,
42 pub analyze_control_flow: bool,
44 pub skip_invalid: bool,
46}
47
48impl Default for DisassemblyConfig {
49 fn default() -> Self {
50 Self {
51 engine: DisassemblyEngine::Auto,
52 max_instructions: 10000,
53 detailed: true,
54 analyze_control_flow: true,
55 skip_invalid: true,
56 }
57 }
58}
59
60pub struct Disassembler {
62 config: DisassemblyConfig,
63 architecture: Architecture,
64}
65
66impl Disassembler {
67 pub fn new(architecture: Architecture) -> Result<Self> {
69 Ok(Self {
70 config: DisassemblyConfig::default(),
71 architecture,
72 })
73 }
74
75 pub fn with_config(architecture: Architecture, config: DisassemblyConfig) -> Result<Self> {
77 Ok(Self {
78 config,
79 architecture,
80 })
81 }
82
83 pub fn disassemble(&self, data: &[u8], address: u64) -> Result<Vec<Instruction>> {
85 let engine = self.select_engine()?;
86
87 match engine {
88 #[cfg(feature = "disasm-capstone")]
89 DisassemblyEngine::Capstone => {
90 capstone_engine::disassemble(data, address, self.architecture, &self.config)
91 }
92 #[cfg(feature = "disasm-iced")]
93 DisassemblyEngine::Iced => {
94 iced_engine::disassemble(data, address, self.architecture, &self.config)
95 }
96 DisassemblyEngine::Auto => {
97 #[cfg(feature = "disasm-capstone")]
99 {
100 capstone_engine::disassemble(data, address, self.architecture, &self.config)
101 }
102 #[cfg(all(feature = "disasm-iced", not(feature = "disasm-capstone")))]
103 {
104 iced_engine::disassemble(data, address, self.architecture, &self.config)
105 }
106 #[cfg(not(any(feature = "disasm-capstone", feature = "disasm-iced")))]
107 {
108 Err(BinaryError::feature_not_available(
109 "No disassembly engine available. Enable 'disasm-capstone' or 'disasm-iced' feature.",
110 ))
111 }
112 }
113 }
114 }
115
116 pub fn disassemble_section(
118 &self,
119 binary: &BinaryFile,
120 section_name: &str,
121 ) -> Result<Vec<Instruction>> {
122 for section in binary.sections() {
123 if section.name == section_name {
124 if let Some(data) = §ion.data {
125 return self.disassemble(data, section.address);
126 } else {
127 return Err(BinaryError::invalid_data(
129 "Section data not available for disassembly",
130 ));
131 }
132 }
133 }
134
135 Err(BinaryError::invalid_data(format!(
136 "Section '{}' not found",
137 section_name
138 )))
139 }
140
141 pub fn disassemble_at(
143 &self,
144 data: &[u8],
145 address: u64,
146 length: usize,
147 ) -> Result<Vec<Instruction>> {
148 if data.len() < length {
149 return Err(BinaryError::invalid_data(
150 "Insufficient data for disassembly",
151 ));
152 }
153
154 self.disassemble(&data[..length], address)
155 }
156
157 fn select_engine(&self) -> Result<DisassemblyEngine> {
159 match self.config.engine {
160 #[cfg(feature = "disasm-capstone")]
161 DisassemblyEngine::Capstone => Ok(DisassemblyEngine::Capstone),
162 #[cfg(feature = "disasm-iced")]
163 DisassemblyEngine::Iced => Ok(DisassemblyEngine::Iced),
164 DisassemblyEngine::Auto => {
165 match self.architecture {
167 Architecture::X86 | Architecture::X86_64 => {
168 #[cfg(feature = "disasm-iced")]
169 {
170 Ok(DisassemblyEngine::Iced)
171 }
172 #[cfg(all(feature = "disasm-capstone", not(feature = "disasm-iced")))]
173 {
174 Ok(DisassemblyEngine::Capstone)
175 }
176 #[cfg(not(any(feature = "disasm-capstone", feature = "disasm-iced")))]
177 {
178 Err(BinaryError::feature_not_available(
179 "No disassembly engine available",
180 ))
181 }
182 }
183 _ => {
184 #[cfg(feature = "disasm-capstone")]
186 {
187 Ok(DisassemblyEngine::Capstone)
188 }
189 #[cfg(not(feature = "disasm-capstone"))]
190 {
191 Err(BinaryError::unsupported_arch(format!(
192 "Architecture {:?} requires Capstone engine",
193 self.architecture
194 )))
195 }
196 }
197 }
198 }
199 }
200 }
201}
202
203pub fn disassemble_binary(
205 binary: &BinaryFile,
206 config: &AnalysisConfig,
207) -> Result<Vec<Instruction>> {
208 let disasm_config = DisassemblyConfig {
209 engine: config.disassembly_engine,
210 max_instructions: config.max_analysis_size / 16, detailed: true,
212 analyze_control_flow: true,
213 skip_invalid: true,
214 };
215
216 let disassembler = Disassembler::with_config(binary.architecture(), disasm_config)?;
217
218 let mut all_instructions = Vec::new();
219
220 for section in binary.sections() {
222 if section.permissions.execute {
223 if let Some(data) = §ion.data {
224 match disassembler.disassemble(data, section.address) {
225 Ok(mut instructions) => {
226 all_instructions.append(&mut instructions);
227 }
228 Err(_) => {
229 continue;
231 }
232 }
233 }
234 }
235 }
236
237 Ok(all_instructions)
238}
239
240fn categorize_instruction(mnemonic: &str) -> InstructionCategory {
242 let mnemonic_lower = mnemonic.to_lowercase();
243
244 if mnemonic_lower.starts_with("add")
245 || mnemonic_lower.starts_with("sub")
246 || mnemonic_lower.starts_with("mul")
247 || mnemonic_lower.starts_with("div")
248 || mnemonic_lower.starts_with("inc")
249 || mnemonic_lower.starts_with("dec")
250 {
251 InstructionCategory::Arithmetic
252 } else if mnemonic_lower.starts_with("and")
253 || mnemonic_lower.starts_with("or")
254 || mnemonic_lower.starts_with("xor")
255 || mnemonic_lower.starts_with("not")
256 || mnemonic_lower.starts_with("shl")
257 || mnemonic_lower.starts_with("shr")
258 {
259 InstructionCategory::Logic
260 } else if mnemonic_lower.starts_with("mov")
261 || mnemonic_lower.starts_with("lea")
262 || mnemonic_lower.starts_with("push")
263 || mnemonic_lower.starts_with("pop")
264 || mnemonic_lower.starts_with("load")
265 || mnemonic_lower.starts_with("store")
266 {
267 InstructionCategory::Memory
268 } else if mnemonic_lower.starts_with("jmp")
269 || mnemonic_lower.starts_with("je")
270 || mnemonic_lower.starts_with("jne")
271 || mnemonic_lower.starts_with("jz")
272 || mnemonic_lower.starts_with("jnz")
273 || mnemonic_lower.starts_with("call")
274 || mnemonic_lower.starts_with("ret")
275 || mnemonic_lower.starts_with("br")
276 || mnemonic_lower.starts_with("bl")
277 {
278 InstructionCategory::Control
279 } else if mnemonic_lower.starts_with("int")
280 || mnemonic_lower.starts_with("syscall")
281 || mnemonic_lower.starts_with("sysenter")
282 || mnemonic_lower.starts_with("sysexit")
283 {
284 InstructionCategory::System
285 } else if mnemonic_lower.contains("aes")
286 || mnemonic_lower.contains("sha")
287 || mnemonic_lower.contains("crypto")
288 {
289 InstructionCategory::Crypto
290 } else if mnemonic_lower.starts_with("fadd")
291 || mnemonic_lower.starts_with("fsub")
292 || mnemonic_lower.starts_with("fmul")
293 || mnemonic_lower.starts_with("fdiv")
294 {
295 InstructionCategory::Float
296 } else if mnemonic_lower.contains("xmm")
297 || mnemonic_lower.contains("ymm")
298 || mnemonic_lower.contains("zmm")
299 || mnemonic_lower.starts_with("v")
300 {
301 InstructionCategory::Vector
302 } else {
303 InstructionCategory::Unknown
304 }
305}
306
307#[cfg(feature = "disasm-capstone")]
309fn analyze_control_flow(mnemonic: &str, operands: &str) -> FlowType {
310 let mnemonic_lower = mnemonic.to_lowercase();
311
312 if mnemonic_lower == "ret" || mnemonic_lower == "retn" {
313 FlowType::Return
314 } else if mnemonic_lower == "call" {
315 if let Some(addr) = extract_address_from_operands(operands) {
317 FlowType::Call(addr)
318 } else {
319 FlowType::Unknown }
321 } else if mnemonic_lower.starts_with("jmp") {
322 if let Some(addr) = extract_address_from_operands(operands) {
323 FlowType::Jump(addr)
324 } else {
325 FlowType::Unknown }
327 } else if mnemonic_lower.starts_with('j') && mnemonic_lower.len() > 1 {
328 if let Some(addr) = extract_address_from_operands(operands) {
330 FlowType::ConditionalJump(addr)
331 } else {
332 FlowType::Unknown }
334 } else if mnemonic_lower == "int" || mnemonic_lower == "syscall" {
335 FlowType::Interrupt
336 } else {
337 FlowType::Sequential
338 }
339}
340
341#[cfg(feature = "disasm-capstone")]
343fn extract_address_from_operands(operands: &str) -> Option<u64> {
344 if let Some(hex_part) = operands.strip_prefix("0x") {
349 if let Ok(addr) = u64::from_str_radix(hex_part, 16) {
350 return Some(addr);
351 }
352 }
353
354 if let Ok(addr) = operands.parse::<u64>() {
356 return Some(addr);
357 }
358
359 None
360}
361
362#[cfg(test)]
363mod tests {
364 use super::*;
365 use crate::types::*;
366
367 #[test]
368 fn test_disassembler_creation() {
369 let result = Disassembler::new(Architecture::X86_64);
370 assert!(result.is_ok());
371 }
372
373 #[test]
374 fn test_config_default() {
375 let config = DisassemblyConfig::default();
376 assert_eq!(config.engine, DisassemblyEngine::Auto);
377 assert_eq!(config.max_instructions, 10000);
378 assert!(config.detailed);
379 assert!(config.analyze_control_flow);
380 }
381
382 #[test]
383 fn test_instruction_categorization() {
384 assert_eq!(
385 categorize_instruction("add"),
386 InstructionCategory::Arithmetic
387 );
388 assert_eq!(categorize_instruction("mov"), InstructionCategory::Memory);
389 assert_eq!(categorize_instruction("jmp"), InstructionCategory::Control);
390 assert_eq!(categorize_instruction("and"), InstructionCategory::Logic);
391 assert_eq!(
392 categorize_instruction("syscall"),
393 InstructionCategory::System
394 );
395 }
396
397 #[test]
398 #[cfg(feature = "disasm-capstone")]
399 fn test_control_flow_analysis() {
400 assert_eq!(analyze_control_flow("ret", ""), FlowType::Return);
401 assert_eq!(
402 analyze_control_flow("call", "0x1000"),
403 FlowType::Call(0x1000)
404 );
405 assert_eq!(
406 analyze_control_flow("jmp", "0x2000"),
407 FlowType::Jump(0x2000)
408 );
409 assert_eq!(
410 analyze_control_flow("je", "0x3000"),
411 FlowType::ConditionalJump(0x3000)
412 );
413 assert_eq!(
414 analyze_control_flow("mov", "eax, ebx"),
415 FlowType::Sequential
416 );
417 }
418
419 #[test]
420 #[cfg(feature = "disasm-capstone")]
421 fn test_address_extraction() {
422 assert_eq!(extract_address_from_operands("0x1000"), Some(0x1000));
423 assert_eq!(extract_address_from_operands("4096"), Some(4096));
424 assert_eq!(extract_address_from_operands("eax"), None);
425 }
426}