threatflux_binary_analysis/analysis/
control_flow.rs

1//! Control flow analysis for binary programs
2//!
3//! This module provides functionality to analyze control flow in binary programs,
4//! including basic block identification, control flow graph construction, and
5//! complexity metrics calculation.
6
7use crate::{
8    disasm::Disassembler,
9    types::{
10        Architecture, BasicBlock, CallGraphConfig, ComplexityMetrics, ControlFlow as FlowType,
11        ControlFlowGraph, Function, Instruction,
12    },
13    BinaryError, BinaryFile, Result,
14};
15use std::cmp;
16use std::collections::{HashMap, HashSet};
17
18// Note: petgraph integration planned for future advanced CFG analysis
19// #[cfg(feature = "control-flow")]
20// use petgraph::{Directed, Graph};
21
22/// Control flow analyzer
23#[derive(Clone)]
24pub struct ControlFlowAnalyzer {
25    /// Architecture being analyzed
26    #[allow(dead_code)]
27    architecture: Architecture,
28    /// Analysis configuration
29    config: AnalysisConfig,
30}
31
32/// Configuration for control flow analysis
33#[derive(Debug, Clone)]
34pub struct AnalysisConfig {
35    /// Maximum number of instructions to analyze per function
36    pub max_instructions: usize,
37    /// Maximum depth for recursive analysis
38    pub max_depth: usize,
39    /// Enable loop detection
40    pub detect_loops: bool,
41    /// Enable complexity metrics calculation
42    pub calculate_metrics: bool,
43    /// Enable call graph construction
44    pub enable_call_graph: bool,
45    /// Enable cognitive complexity calculation
46    pub enable_cognitive_complexity: bool,
47    /// Enable advanced loop analysis
48    pub enable_advanced_loops: bool,
49    /// Call graph configuration
50    pub call_graph_config: Option<CallGraphConfig>,
51}
52
53impl Default for AnalysisConfig {
54    fn default() -> Self {
55        Self {
56            max_instructions: 10000,
57            max_depth: 100,
58            detect_loops: true,
59            calculate_metrics: true,
60            enable_call_graph: false,
61            enable_cognitive_complexity: true,
62            enable_advanced_loops: true,
63            call_graph_config: None,
64        }
65    }
66}
67
68impl ControlFlowAnalyzer {
69    /// Create a new control flow analyzer
70    pub fn new(architecture: Architecture) -> Self {
71        Self {
72            architecture,
73            config: AnalysisConfig::default(),
74        }
75    }
76
77    /// Create analyzer with custom configuration
78    pub fn with_config(architecture: Architecture, config: AnalysisConfig) -> Self {
79        Self {
80            architecture,
81            config,
82        }
83    }
84
85    /// Analyze control flow for all functions in a binary
86    pub fn analyze_binary(&self, binary: &BinaryFile) -> Result<Vec<ControlFlowGraph>> {
87        let mut cfgs = Vec::new();
88
89        // Get functions from symbols
90        let functions = self.extract_functions(binary)?;
91
92        for function in functions {
93            if let Ok(cfg) = self.analyze_function(binary, &function) {
94                cfgs.push(cfg);
95            }
96        }
97
98        Ok(cfgs)
99    }
100
101    /// Analyze control flow for a specific function
102    pub fn analyze_function(
103        &self,
104        binary: &BinaryFile,
105        function: &Function,
106    ) -> Result<ControlFlowGraph> {
107        // Get instructions for the function
108        let instructions = self.get_function_instructions(binary, function)?;
109
110        // Build basic blocks
111        let mut basic_blocks = self.build_basic_blocks(&instructions)?;
112
113        // Calculate complexity metrics
114        let complexity = if self.config.calculate_metrics {
115            self.calculate_complexity(&basic_blocks)
116        } else {
117            ComplexityMetrics::default()
118        };
119
120        // Create a mutable analyzer for enhanced analysis
121        let mut analyzer = self.clone();
122
123        // Perform enhanced analysis if enabled
124        let loops = if self.config.enable_advanced_loops {
125            analyzer
126                .analyze_loops(&mut basic_blocks)
127                .unwrap_or_default()
128        } else {
129            Vec::new()
130        };
131
132        // Build dominator tree
133        if self.config.enable_advanced_loops {
134            let _ = analyzer.build_dominator_tree(&mut basic_blocks);
135        }
136
137        // Classify block types
138        if self.config.enable_advanced_loops {
139            let _ = analyzer.classify_block_types(&mut basic_blocks);
140        }
141
142        Ok(ControlFlowGraph {
143            function: function.clone(),
144            basic_blocks,
145            complexity,
146            loops,
147        })
148    }
149
150    /// Extract functions from binary symbols
151    fn extract_functions(&self, binary: &BinaryFile) -> Result<Vec<Function>> {
152        let mut functions = Vec::new();
153
154        for symbol in binary.symbols() {
155            if matches!(symbol.symbol_type, crate::types::SymbolType::Function) {
156                let function = Function {
157                    name: symbol.name.clone(),
158                    start_address: symbol.address,
159                    end_address: symbol.address + symbol.size,
160                    size: symbol.size,
161                    function_type: crate::types::FunctionType::Normal,
162                    calling_convention: None,
163                    parameters: Vec::new(),
164                    return_type: None,
165                };
166                functions.push(function);
167            }
168        }
169
170        // If no function symbols, try to find functions from entry point
171        if functions.is_empty() {
172            if let Some(entry_point) = binary.entry_point() {
173                let function = Function {
174                    name: "_start".to_string(),
175                    start_address: entry_point,
176                    end_address: entry_point + 1000, // Estimate
177                    size: 1000,
178                    function_type: crate::types::FunctionType::Entrypoint,
179                    calling_convention: None,
180                    parameters: Vec::new(),
181                    return_type: None,
182                };
183                functions.push(function);
184            }
185        }
186
187        Ok(functions)
188    }
189
190    /// Get instructions for a function using the disassembly module
191    fn get_function_instructions(
192        &self,
193        binary: &BinaryFile,
194        function: &Function,
195    ) -> Result<Vec<Instruction>> {
196        // Locate the section containing this function
197        for section in binary.sections() {
198            let start = section.address;
199            let end = start + section.size;
200
201            if function.start_address >= start && function.start_address < end {
202                let data = section.data.as_ref().ok_or_else(|| {
203                    BinaryError::invalid_data("Section data not available for disassembly")
204                })?;
205
206                let offset = (function.start_address - start) as usize;
207                if offset >= data.len() {
208                    return Ok(Vec::new());
209                }
210
211                let available = data.len() - offset;
212                let length = cmp::min(function.size as usize, available);
213                if length == 0 {
214                    return Ok(Vec::new());
215                }
216
217                let slice = &data[offset..offset + length];
218                let disassembler = Disassembler::new(self.architecture)?;
219                return disassembler.disassemble_at(slice, function.start_address, length);
220            }
221        }
222
223        Err(BinaryError::invalid_data(
224            "Function bytes not found in any executable section",
225        ))
226    }
227
228    /// Build basic blocks from instructions
229    fn build_basic_blocks(&self, instructions: &[Instruction]) -> Result<Vec<BasicBlock>> {
230        if instructions.is_empty() {
231            return Ok(Vec::new());
232        }
233
234        let mut basic_blocks = Vec::new();
235        let mut block_starts = HashSet::new();
236
237        // First instruction is always a block start
238        block_starts.insert(instructions[0].address);
239
240        // Find all block boundaries
241        for (i, instr) in instructions.iter().enumerate() {
242            match &instr.flow {
243                FlowType::Jump(target)
244                | FlowType::ConditionalJump(target)
245                | FlowType::Call(target) => {
246                    // Target of jump/call is a block start
247                    block_starts.insert(*target);
248                    // Instruction after conditional jump/call is also a block start
249                    if i + 1 < instructions.len() {
250                        block_starts.insert(instructions[i + 1].address);
251                    }
252                }
253                FlowType::Return | FlowType::Interrupt => {
254                    // Instruction after return/interrupt is a block start (if exists)
255                    if i + 1 < instructions.len() {
256                        block_starts.insert(instructions[i + 1].address);
257                    }
258                }
259                _ => {}
260            }
261        }
262
263        // Build basic blocks
264        let mut current_block_id = 0;
265        let mut current_block_start = 0;
266
267        for (i, instr) in instructions.iter().enumerate() {
268            if block_starts.contains(&instr.address) && i > current_block_start {
269                // End current block
270                let block_instructions = instructions[current_block_start..i].to_vec();
271                let start_addr = instructions[current_block_start].address;
272                let end_addr = instructions[i - 1].address + instructions[i - 1].size as u64;
273
274                basic_blocks.push(BasicBlock {
275                    id: current_block_id,
276                    start_address: start_addr,
277                    end_address: end_addr,
278                    instructions: block_instructions,
279                    successors: Vec::new(),   // Will be filled later
280                    predecessors: Vec::new(), // Will be filled later
281                    block_type: crate::types::BlockType::Normal, // Will be classified later
282                    dominator: None,          // Will be computed later
283                    dominance_frontier: Vec::new(), // Will be computed later
284                });
285
286                current_block_id += 1;
287                current_block_start = i;
288            }
289        }
290
291        // Add the last block
292        if current_block_start < instructions.len() {
293            let block_instructions = instructions[current_block_start..].to_vec();
294            let start_addr = instructions[current_block_start].address;
295            let end_addr =
296                instructions.last().unwrap().address + instructions.last().unwrap().size as u64;
297
298            basic_blocks.push(BasicBlock {
299                id: current_block_id,
300                start_address: start_addr,
301                end_address: end_addr,
302                instructions: block_instructions,
303                successors: Vec::new(),
304                predecessors: Vec::new(),
305                block_type: crate::types::BlockType::Normal, // Will be classified later
306                dominator: None,                             // Will be computed later
307                dominance_frontier: Vec::new(),              // Will be computed later
308            });
309        }
310
311        // Build successor/predecessor relationships
312        self.build_cfg_edges(&mut basic_blocks)?;
313
314        Ok(basic_blocks)
315    }
316
317    /// Build control flow graph edges between basic blocks
318    fn build_cfg_edges(&self, basic_blocks: &mut [BasicBlock]) -> Result<()> {
319        let mut addr_to_block: HashMap<u64, usize> = HashMap::new();
320
321        // Build address to block ID mapping
322        for (i, block) in basic_blocks.iter().enumerate() {
323            addr_to_block.insert(block.start_address, i);
324        }
325
326        // Build edges
327        for i in 0..basic_blocks.len() {
328            let block = &basic_blocks[i];
329            if let Some(last_instr) = block.instructions.last() {
330                match &last_instr.flow {
331                    FlowType::Sequential => {
332                        // Fall through to next block
333                        if i + 1 < basic_blocks.len() {
334                            basic_blocks[i].successors.push(i + 1);
335                            basic_blocks[i + 1].predecessors.push(i);
336                        }
337                    }
338                    FlowType::Jump(target) => {
339                        // Unconditional jump
340                        if let Some(&target_block) = addr_to_block.get(target) {
341                            basic_blocks[i].successors.push(target_block);
342                            basic_blocks[target_block].predecessors.push(i);
343                        }
344                    }
345                    FlowType::ConditionalJump(target) => {
346                        // Conditional jump - two successors
347                        if let Some(&target_block) = addr_to_block.get(target) {
348                            basic_blocks[i].successors.push(target_block);
349                            basic_blocks[target_block].predecessors.push(i);
350                        }
351                        // Fall through
352                        if i + 1 < basic_blocks.len() {
353                            basic_blocks[i].successors.push(i + 1);
354                            basic_blocks[i + 1].predecessors.push(i);
355                        }
356                    }
357                    FlowType::Call(_target) => {
358                        // Function call - continues to next instruction
359                        if i + 1 < basic_blocks.len() {
360                            basic_blocks[i].successors.push(i + 1);
361                            basic_blocks[i + 1].predecessors.push(i);
362                        }
363                        // Note: Call target is not added as successor for CFG
364                    }
365                    FlowType::Return | FlowType::Interrupt => {
366                        // No successors
367                    }
368                    FlowType::Unknown => {
369                        // Conservatively assume fall through
370                        if i + 1 < basic_blocks.len() {
371                            basic_blocks[i].successors.push(i + 1);
372                            basic_blocks[i + 1].predecessors.push(i);
373                        }
374                    }
375                }
376            }
377        }
378
379        Ok(())
380    }
381
382    /// Calculate complexity metrics for a control flow graph
383    fn calculate_complexity(&self, basic_blocks: &[BasicBlock]) -> ComplexityMetrics {
384        let basic_block_count = basic_blocks.len() as u32;
385        let mut edge_count = 0;
386
387        // Count edges
388        for block in basic_blocks {
389            edge_count += block.successors.len() as u32;
390        }
391
392        // Cyclomatic complexity = E - N + 2P
393        // Where E = edges, N = nodes, P = connected components (assume 1)
394        let cyclomatic_complexity = if basic_block_count > 0 {
395            edge_count.saturating_sub(basic_block_count) + 2
396        } else {
397            0
398        };
399
400        // Detect loops (simplified)
401        let loop_count = self.detect_loops(basic_blocks);
402
403        // Calculate nesting depth (simplified)
404        let nesting_depth = self.calculate_nesting_depth(basic_blocks);
405
406        // Calculate cognitive complexity
407        let cognitive_complexity = self.calculate_cognitive_complexity(basic_blocks);
408
409        // Calculate Halstead metrics if available
410        let halstead_metrics = self.calculate_halstead_metrics(basic_blocks);
411
412        // Calculate maintainability index if Halstead metrics are available
413        let maintainability_index = if let Some(ref halstead) = halstead_metrics {
414            self.calculate_maintainability_index(halstead, cyclomatic_complexity, basic_block_count)
415        } else {
416            None
417        };
418
419        ComplexityMetrics {
420            cyclomatic_complexity,
421            basic_block_count,
422            edge_count,
423            nesting_depth,
424            loop_count,
425            cognitive_complexity,
426            halstead_metrics,
427            maintainability_index,
428        }
429    }
430
431    /// Detect loops in the control flow graph
432    fn detect_loops(&self, basic_blocks: &[BasicBlock]) -> u32 {
433        if !self.config.detect_loops {
434            return 0;
435        }
436
437        let mut loop_count = 0;
438        let mut visited = vec![false; basic_blocks.len()];
439        let mut in_stack = vec![false; basic_blocks.len()];
440
441        // Use DFS to detect back edges (indicating loops)
442        for i in 0..basic_blocks.len() {
443            if !visited[i] {
444                loop_count += Self::dfs_detect_loops(i, basic_blocks, &mut visited, &mut in_stack);
445            }
446        }
447
448        loop_count
449    }
450
451    /// DFS helper for loop detection
452    fn dfs_detect_loops(
453        node: usize,
454        basic_blocks: &[BasicBlock],
455        visited: &mut [bool],
456        in_stack: &mut [bool],
457    ) -> u32 {
458        visited[node] = true;
459        in_stack[node] = true;
460        let mut loops = 0;
461
462        for &successor in &basic_blocks[node].successors {
463            if !visited[successor] {
464                loops += Self::dfs_detect_loops(successor, basic_blocks, visited, in_stack);
465            } else if in_stack[successor] {
466                // Back edge found - indicates a loop
467                loops += 1;
468            }
469        }
470
471        in_stack[node] = false;
472        loops
473    }
474
475    /// Calculate nesting depth (simplified heuristic)
476    fn calculate_nesting_depth(&self, basic_blocks: &[BasicBlock]) -> u32 {
477        let mut max_depth = 0;
478
479        // Simple heuristic: depth based on indegree
480        for block in basic_blocks {
481            let depth = block.predecessors.len() as u32;
482            if depth > max_depth {
483                max_depth = depth;
484            }
485        }
486
487        max_depth
488    }
489
490    /// Calculate cognitive complexity (different from cyclomatic complexity)
491    /// Cognitive complexity measures how difficult the code is to understand
492    fn calculate_cognitive_complexity(&self, basic_blocks: &[BasicBlock]) -> u32 {
493        let mut cognitive_complexity = 0;
494        let mut nesting_level = 0;
495
496        for block in basic_blocks {
497            for instruction in &block.instructions {
498                // Increment for decision structures
499                match &instruction.flow {
500                    FlowType::ConditionalJump(_) => {
501                        cognitive_complexity += 1 + nesting_level;
502                    }
503                    FlowType::Jump(_) => {
504                        // Break/continue statements in loops add complexity
505                        if self.is_in_loop_context(block, basic_blocks) {
506                            cognitive_complexity += 1;
507                        }
508                    }
509                    _ => {}
510                }
511
512                // Analyze instruction patterns for complexity
513                match instruction.mnemonic.as_str() {
514                    // Conditional instructions
515                    "je" | "jne" | "jl" | "jle" | "jg" | "jge" | "jz" | "jnz" | "js" | "jns" => {
516                        cognitive_complexity += 1 + nesting_level;
517                    }
518                    // Loop instructions
519                    "loop" | "loope" | "loopne" | "loopz" | "loopnz" => {
520                        cognitive_complexity += 1 + nesting_level;
521                        nesting_level += 1; // Increase nesting for subsequent instructions
522                    }
523                    // Exception handling
524                    "int" | "syscall" | "sysenter" => {
525                        cognitive_complexity += 1;
526                    }
527                    _ => {}
528                }
529            }
530
531            // Adjust nesting level based on block structure
532            if block.successors.len() > 1 {
533                nesting_level += 1;
534            }
535        }
536
537        cognitive_complexity
538    }
539
540    /// Check if a block is in a loop context
541    fn is_in_loop_context(&self, _block: &BasicBlock, _basic_blocks: &[BasicBlock]) -> bool {
542        // Simplified implementation - in real implementation, this would
543        // analyze the control flow to determine if we're inside a loop
544        false
545    }
546
547    /// Calculate Halstead metrics for software complexity
548    fn calculate_halstead_metrics(
549        &self,
550        basic_blocks: &[BasicBlock],
551    ) -> Option<crate::types::HalsteadMetrics> {
552        let mut operators = HashMap::new();
553        let mut operands = HashMap::new();
554        let mut total_operators = 0;
555        let mut total_operands = 0;
556
557        for block in basic_blocks {
558            for instruction in &block.instructions {
559                // Count operators (mnemonics)
560                *operators.entry(instruction.mnemonic.clone()).or_insert(0) += 1;
561                total_operators += 1;
562
563                // Count operands (simplified - split operands string)
564                if !instruction.operands.is_empty() {
565                    let ops: Vec<&str> = instruction.operands.split(',').collect();
566                    for op in ops {
567                        let trimmed = op.trim();
568                        if !trimmed.is_empty() {
569                            *operands.entry(trimmed.to_string()).or_insert(0) += 1;
570                            total_operands += 1;
571                        }
572                    }
573                }
574            }
575        }
576
577        let n1 = operators.len() as u32; // Distinct operators
578        let n2 = operands.len() as u32; // Distinct operands
579        let capital_n1 = total_operators; // Total operators
580        let capital_n2 = total_operands; // Total operands
581
582        if n1 == 0 && n2 == 0 {
583            return None;
584        }
585
586        let vocabulary = n1 + n2;
587        let length = capital_n1 + capital_n2;
588        let calculated_length = (n1 as f64) * (n1 as f64).log2() + (n2 as f64) * (n2 as f64).log2();
589        let volume = (length as f64) * (vocabulary as f64).log2();
590        let difficulty = if n2 > 0 {
591            ((n1 as f64) / 2.0) * ((capital_n2 as f64) / (n2 as f64))
592        } else {
593            0.0
594        };
595        let effort = difficulty * volume;
596        let time = effort / 18.0; // Assuming 18 mental discriminations per second
597        let bugs = volume / 3000.0; // Estimated bugs
598
599        Some(crate::types::HalsteadMetrics {
600            n1,
601            n2,
602            capital_n1,
603            capital_n2,
604            vocabulary,
605            length,
606            calculated_length,
607            volume,
608            difficulty,
609            effort,
610            time,
611            bugs,
612        })
613    }
614
615    /// Calculate maintainability index
616    fn calculate_maintainability_index(
617        &self,
618        halstead: &crate::types::HalsteadMetrics,
619        cyclomatic_complexity: u32,
620        lines_of_code: u32,
621    ) -> Option<f64> {
622        if halstead.volume <= 0.0 || lines_of_code == 0 {
623            return None;
624        }
625
626        // Maintainability Index = 171 - 5.2 * ln(HV) - 0.23 * CC - 16.2 * ln(LOC)
627        // Where HV = Halstead Volume, CC = Cyclomatic Complexity, LOC = Lines of Code
628        let mi = 171.0
629            - 5.2 * halstead.volume.ln()
630            - 0.23 * (cyclomatic_complexity as f64)
631            - 16.2 * (lines_of_code as f64).ln();
632
633        // Clamp to 0-100 range
634        Some(mi.clamp(0.0, 100.0))
635    }
636
637    /// Perform enhanced loop analysis
638    pub fn analyze_loops(
639        &mut self,
640        basic_blocks: &mut [BasicBlock],
641    ) -> Result<Vec<crate::types::Loop>> {
642        if !self.config.enable_advanced_loops {
643            return Ok(Vec::new());
644        }
645
646        let mut loops = Vec::new();
647        let mut visited = vec![false; basic_blocks.len()];
648        let mut in_stack = vec![false; basic_blocks.len()];
649        let mut back_edges = Vec::new();
650
651        // Find back edges using DFS
652        for i in 0..basic_blocks.len() {
653            if !visited[i] {
654                self.find_back_edges(
655                    i,
656                    basic_blocks,
657                    &mut visited,
658                    &mut in_stack,
659                    &mut back_edges,
660                );
661            }
662        }
663
664        // Analyze each back edge to identify loops
665        for (tail, head) in back_edges {
666            if let Some(loop_info) = self.analyze_natural_loop(head, tail, basic_blocks) {
667                loops.push(loop_info);
668            }
669        }
670
671        // Classify loop types and detect induction variables
672        for loop_info in &mut loops {
673            self.classify_loop_type(loop_info, basic_blocks);
674            self.detect_induction_variables(loop_info, basic_blocks);
675        }
676
677        Ok(loops)
678    }
679
680    /// Find back edges using DFS
681    #[allow(clippy::only_used_in_recursion)]
682    fn find_back_edges(
683        &self,
684        node: usize,
685        basic_blocks: &[BasicBlock],
686        visited: &mut [bool],
687        in_stack: &mut [bool],
688        back_edges: &mut Vec<(usize, usize)>,
689    ) {
690        visited[node] = true;
691        in_stack[node] = true;
692
693        for &successor in &basic_blocks[node].successors {
694            if !visited[successor] {
695                self.find_back_edges(successor, basic_blocks, visited, in_stack, back_edges);
696            } else if in_stack[successor] {
697                // Found back edge: node -> successor
698                back_edges.push((node, successor));
699            }
700        }
701
702        in_stack[node] = false;
703    }
704
705    /// Analyze a natural loop given a back edge
706    fn analyze_natural_loop(
707        &self,
708        header: usize,
709        tail: usize,
710        basic_blocks: &[BasicBlock],
711    ) -> Option<crate::types::Loop> {
712        let mut loop_blocks = HashSet::new();
713        let mut worklist = Vec::new();
714
715        // Start with the header and tail
716        loop_blocks.insert(header);
717        loop_blocks.insert(tail);
718        worklist.push(tail);
719
720        // Find all blocks in the loop using backwards traversal
721        while let Some(current) = worklist.pop() {
722            for &pred in &basic_blocks[current].predecessors {
723                if !loop_blocks.contains(&pred) {
724                    loop_blocks.insert(pred);
725                    worklist.push(pred);
726                }
727            }
728        }
729
730        // Find exit blocks
731        let mut exit_blocks = Vec::new();
732        for &block_id in &loop_blocks {
733            for &successor in &basic_blocks[block_id].successors {
734                if !loop_blocks.contains(&successor) {
735                    exit_blocks.push(successor);
736                }
737            }
738        }
739
740        let body_blocks: Vec<usize> = loop_blocks.into_iter().filter(|&id| id != header).collect();
741
742        Some(crate::types::Loop {
743            header_block: header,
744            body_blocks,
745            exit_blocks,
746            loop_type: crate::types::LoopType::Unknown, // Will be classified later
747            induction_variables: Vec::new(),            // Will be detected later
748            is_natural: true,                           // Natural loops by definition
749            nesting_level: 0,                           // Will be calculated later
750        })
751    }
752
753    /// Classify the type of loop
754    fn classify_loop_type(&self, loop_info: &mut crate::types::Loop, basic_blocks: &[BasicBlock]) {
755        let header_block = &basic_blocks[loop_info.header_block];
756
757        // Analyze the loop header to determine loop type
758        if let Some(last_instruction) = header_block.instructions.last() {
759            match last_instruction.mnemonic.as_str() {
760                // While loop pattern: test at beginning
761                "cmp" | "test" => {
762                    loop_info.loop_type = crate::types::LoopType::While;
763                }
764                // For loop pattern: has induction variable
765                "inc" | "dec" | "add" | "sub" => {
766                    loop_info.loop_type = crate::types::LoopType::For;
767                }
768                // Loop instruction
769                "loop" | "loope" | "loopne" => {
770                    loop_info.loop_type = crate::types::LoopType::For;
771                }
772                _ => {
773                    // Check if it's a do-while (test at end)
774                    if !loop_info.exit_blocks.is_empty() {
775                        let exit_block = &basic_blocks[loop_info.exit_blocks[0]];
776                        if let Some(exit_instruction) = exit_block.instructions.first() {
777                            if matches!(exit_instruction.mnemonic.as_str(), "cmp" | "test") {
778                                loop_info.loop_type = crate::types::LoopType::DoWhile;
779                            }
780                        }
781                    }
782
783                    // Check for infinite loop (no clear exit)
784                    if loop_info.exit_blocks.is_empty() {
785                        loop_info.loop_type = crate::types::LoopType::Infinite;
786                    }
787                }
788            }
789        }
790
791        // If still unknown, classify as natural or irreducible
792        if loop_info.loop_type == crate::types::LoopType::Unknown {
793            loop_info.loop_type = if loop_info.is_natural {
794                crate::types::LoopType::Natural
795            } else {
796                crate::types::LoopType::Irreducible
797            };
798        }
799    }
800
801    /// Detect induction variables in a loop
802    fn detect_induction_variables(
803        &self,
804        loop_info: &mut crate::types::Loop,
805        basic_blocks: &[BasicBlock],
806    ) {
807        let mut induction_vars = HashSet::new();
808
809        // Look for variables that are incremented/decremented in the loop
810        for &block_id in &loop_info.body_blocks {
811            let block = &basic_blocks[block_id];
812            for instruction in &block.instructions {
813                match instruction.mnemonic.as_str() {
814                    "inc" | "dec" | "add" | "sub" => {
815                        // Extract operand as potential induction variable
816                        if !instruction.operands.is_empty() {
817                            let operand =
818                                instruction.operands.split(',').next().unwrap_or("").trim();
819                            if !operand.is_empty()
820                                && !operand.starts_with('#')
821                                && !operand.starts_with('$')
822                            {
823                                induction_vars.insert(operand.to_string());
824                            }
825                        }
826                    }
827                    _ => {}
828                }
829            }
830        }
831
832        loop_info.induction_variables = induction_vars.into_iter().collect();
833    }
834
835    /// Build dominator tree for enhanced block classification
836    pub fn build_dominator_tree(&mut self, basic_blocks: &mut [BasicBlock]) -> Result<()> {
837        if basic_blocks.is_empty() {
838            return Ok(());
839        }
840
841        let n = basic_blocks.len();
842        let mut dominators = vec![None; n];
843        dominators[0] = Some(0); // Entry block dominates itself
844
845        // Iterative algorithm to compute dominators
846        let mut changed = true;
847        while changed {
848            changed = false;
849            for i in 1..n {
850                let mut new_dom = None;
851
852                // Find first processed predecessor
853                for &pred in &basic_blocks[i].predecessors {
854                    if dominators[pred].is_some() {
855                        new_dom = Some(pred);
856                        break;
857                    }
858                }
859
860                // Intersect with all other processed predecessors
861                if let Some(mut dom) = new_dom {
862                    for &pred in &basic_blocks[i].predecessors {
863                        if let Some(pred_dom) = dominators[pred] {
864                            dom = self.intersect_dominators(dom, pred_dom, &dominators);
865                        }
866                    }
867
868                    if dominators[i] != Some(dom) {
869                        dominators[i] = Some(dom);
870                        changed = true;
871                    }
872                }
873            }
874        }
875
876        // Set dominator information in basic blocks
877        for (i, &dom) in dominators.iter().enumerate() {
878            basic_blocks[i].dominator = dom;
879        }
880
881        Ok(())
882    }
883
884    /// Intersect two dominators to find common dominator
885    fn intersect_dominators(
886        &self,
887        mut b1: usize,
888        mut b2: usize,
889        dominators: &[Option<usize>],
890    ) -> usize {
891        while b1 != b2 {
892            while b1 > b2 {
893                if let Some(dom) = dominators[b1] {
894                    b1 = dom;
895                } else {
896                    break;
897                }
898            }
899            while b2 > b1 {
900                if let Some(dom) = dominators[b2] {
901                    b2 = dom;
902                } else {
903                    break;
904                }
905            }
906        }
907        b1
908    }
909
910    /// Classify basic block types based on their role in control flow
911    pub fn classify_block_types(&mut self, basic_blocks: &mut [BasicBlock]) -> Result<()> {
912        if basic_blocks.is_empty() {
913            return Ok(());
914        }
915
916        // Entry block
917        basic_blocks[0].block_type = crate::types::BlockType::Entry;
918
919        // Classify other blocks
920        #[allow(clippy::needless_range_loop)]
921        for i in 1..basic_blocks.len() {
922            let block = &basic_blocks[i];
923
924            // Exit blocks (no successors)
925            if block.successors.is_empty() {
926                basic_blocks[i].block_type = crate::types::BlockType::Exit;
927                continue;
928            }
929
930            // Return blocks
931            if let Some(last_instruction) = block.instructions.last() {
932                match last_instruction.flow {
933                    FlowType::Return => {
934                        basic_blocks[i].block_type = crate::types::BlockType::Return;
935                        continue;
936                    }
937                    FlowType::Call(_) => {
938                        basic_blocks[i].block_type = crate::types::BlockType::Call;
939                        continue;
940                    }
941                    FlowType::ConditionalJump(_) => {
942                        basic_blocks[i].block_type = crate::types::BlockType::Conditional;
943                        continue;
944                    }
945                    _ => {}
946                }
947            }
948
949            // Default to normal block
950            basic_blocks[i].block_type = crate::types::BlockType::Normal;
951        }
952
953        Ok(())
954    }
955}
956
957/// Analyze binary control flow
958pub fn analyze_binary(binary: &BinaryFile) -> Result<Vec<ControlFlowGraph>> {
959    let analyzer = ControlFlowAnalyzer::new(binary.architecture());
960    analyzer.analyze_binary(binary)
961}
962
963/// Analyze control flow for a specific function
964pub fn analyze_function(binary: &BinaryFile, function: &Function) -> Result<ControlFlowGraph> {
965    let analyzer = ControlFlowAnalyzer::new(binary.architecture());
966    analyzer.analyze_function(binary, function)
967}
968
969#[cfg(test)]
970mod tests {
971    use super::*;
972    use crate::types::*;
973
974    #[test]
975    fn test_analyzer_creation() {
976        let analyzer = ControlFlowAnalyzer::new(Architecture::X86_64);
977        assert_eq!(analyzer.architecture, Architecture::X86_64);
978    }
979
980    #[test]
981    fn test_config_default() {
982        let config = AnalysisConfig::default();
983        assert_eq!(config.max_instructions, 10000);
984        assert_eq!(config.max_depth, 100);
985        assert!(config.detect_loops);
986        assert!(config.calculate_metrics);
987    }
988
989    #[test]
990    fn test_basic_block_creation() {
991        let instructions = vec![
992            Instruction {
993                address: 0x1000,
994                bytes: vec![0x90],
995                mnemonic: "nop".to_string(),
996                operands: String::new(),
997                category: InstructionCategory::Unknown,
998                flow: FlowType::Sequential,
999                size: 1,
1000            },
1001            Instruction {
1002                address: 0x1001,
1003                bytes: vec![0xc3],
1004                mnemonic: "ret".to_string(),
1005                operands: String::new(),
1006                category: InstructionCategory::Control,
1007                flow: FlowType::Return,
1008                size: 1,
1009            },
1010        ];
1011
1012        let analyzer = ControlFlowAnalyzer::new(Architecture::X86_64);
1013        let blocks = analyzer.build_basic_blocks(&instructions).unwrap();
1014
1015        assert_eq!(blocks.len(), 1);
1016        assert_eq!(blocks[0].instructions.len(), 2);
1017        assert_eq!(blocks[0].start_address, 0x1000);
1018        assert_eq!(blocks[0].end_address, 0x1002);
1019    }
1020}