Refactored fixpoint modules (#77)

e82c19ce · Enkelmann · GitHub · 281a0207 · e82c19ce · e82c19ce
Unverified Commit e82c19ce authored Aug 06, 2020 by Enkelmann Committed by GitHub Aug 06, 2020
4 changed files
--- a/cwe_checker_rs/src/analysis/fixpoint.rs
+++ b/cwe_checker_rs/src/analysis/fixpoint.rs
-/*!
-This module implements a generic fixpoint algorithm for dataflow analysis.
-
-A fixpoint problem is defined as a graph where:
- Each node `n` gets assigned a value `val(n)` where the set of all values forms a partially ordered set.
- Each edge `e` defines a rule `e:value -> value` how to compute the value at the end node given the value at the start node of the edge.
-
-A fixpoint is an assignment of values to all nodes of the graph so that for all edges
-`e(val(start_node)) <= val(end_node)` holds.
-
-For general information on dataflow analysis using fixpoint algorithms see [Wikipedia](https://en.wikipedia.org/wiki/Data-flow_analysis).
-Or open an issue on github that you want more documentation here. :-)
-*/
+//! Creating and computing generic fixpoint computations.
+//!
+//! For general information on dataflow analysis using fixpoint algorithms see [Wikipedia](https://en.wikipedia.org/wiki/Data-flow_analysis).
+//!
+//! # General implementation notes
+//!
+//! A fixpoint problem is defined as a graph where:
+//! - Each node `n` gets assigned a value `val(n)` where the set of all values forms a partially ordered set.
+//! - Each edge `e` defines a rule `e:value -> value` how to compute the value at the end node given the value at the start node of the edge.
+//!
+//! A fixpoint is reached if an assignment of values to all nodes of the graph is found
+//! so that for all edges `e(val(start_node)) <= val(end_node)` holds.
+//! Usually one wants to find the smallest fixpoint,
+//! i.e. a fixpoint such that for each node `n` the value `val(n)` is as small as possible (with respect to the partial order)
+//! but also not less than a given starting value.
+//!
+//! As in the `graph` module, nodes are assumed to represent points in time,
+//! whereas edges represent state transitions or (artificial) information flow channels.
+//! In particular, only edges have transition functions and not nodes.
+//!
+//! In the current implementation edge transition functions are also allowed to return `None`
+//! to indicate that no information flows through the edge.
+//! For example, an analysis can use this to indicate edges that are never taken
+//! and thus prevent dead code to affect the analysis.
+//!
+//! # How to compute the solution to a fixpoint problem
+//!
+//! To create a fixpoint computation one needs an object implementing the `Context` trait.
+//! This object contains all information necessary to compute fixpoints,
+//! like the graph or how to compute transition functions,
+//! but not the actual starting values of a fixpoint computation.
+//! With it, create a `Computation` object and then modify the node values through the object
+//! to match the intended starting conditions of the fixpoint computation.
+//! The `Computation` object also contains methods to actually run the fixpoint computation after the starting values are set
+//! and methods to retrieve the results of the computation.

 use fnv::FnvHashMap;
 use petgraph::graph::{DiGraph, EdgeIndex, NodeIndex};
 use petgraph::visit::EdgeRef;
 use std::collections::{BTreeMap, BinaryHeap};

-/// A fixpoint problem defines the context for a fixpoint computation.
+/// The context of a fixpoint computation.
 ///
 /// All trait methods have access to the FixpointProblem structure, so that context informations are accessible through it.
-pub trait Problem {
+pub trait Context {
+    /// the type of edge labels of the underlying graph
    type EdgeLabel: Clone;
+    /// the type of node labels of the underlying graph
    type NodeLabel;
+    /// The type of the value that gets assigned to each node.
+    /// The values should form a partially ordered set.
    type NodeValue: PartialEq + Eq;

+    /// Get the graph on which the fixpoint computation operates.
    fn get_graph(&self) -> &DiGraph<Self::NodeLabel, Self::EdgeLabel>;

    /// This function describes how to merge two values
@@ -36,21 +63,41 @@ pub trait Problem {
    fn update_edge(&self, value: &Self::NodeValue, edge: EdgeIndex) -> Option<Self::NodeValue>;
 }

-/// The computation struct contains an intermediate result of a fixpoint computation.
-pub struct Computation<T: Problem> {
-    fp_problem: T,
-    node_priority_list: Vec<usize>, // maps a node index to its priority (higher priority nodes get stabilized first)
-    priority_to_node_list: Vec<NodeIndex>, // maps a priority to the corresponding node index
+/// The computation struct contains an intermediate result of a fixpoint computation
+/// and provides methods for continuing the fixpoint computation
+/// or extracting the (intermediate or final) results.
+///
+/// # Usage
+///
+/// ```
+/// let mut computation = Computation::new(context, optional_default_node_value);
+///
+/// // set starting node values with computation.set_node_value(..)
+/// // ...
+///
+/// computation.compute();
+///
+/// // get the resulting node values
+/// if let Some(node_value) = computation.get_node_value(node_index) {
+///     // ...
+/// };
+/// ```
+pub struct Computation<T: Context> {
+    fp_context: T,
+    /// maps a node index to its priority (higher priority nodes get stabilized first)
+    node_priority_list: Vec<usize>,
+    /// maps a priority to the corresponding node index
+    priority_to_node_list: Vec<NodeIndex>,
    worklist: BinaryHeap<usize>,
    default_value: Option<T::NodeValue>,
    node_values: FnvHashMap<NodeIndex, T::NodeValue>,
 }

-impl<T: Problem> Computation<T> {
+impl<T: Context> Computation<T> {
    /// Create a new fixpoint computation from a fixpoint problem, the corresponding graph
    /// and a default value for all nodes if one should exists.
-    pub fn new(fp_problem: T, default_value: Option<T::NodeValue>) -> Self {
-        let graph = fp_problem.get_graph();
+    pub fn new(fp_context: T, default_value: Option<T::NodeValue>) -> Self {
+        let graph = fp_context.get_graph();
        // order the nodes in weak topological order
        let sorted_nodes: Vec<NodeIndex> = petgraph::algo::kosaraju_scc(&graph)
            .into_iter()
@@ -70,7 +117,7 @@ impl<T: Problem> Computation<T> {
            }
        }
        Computation {
-            fp_problem,
+            fp_context,
            node_priority_list,
            priority_to_node_list: sorted_nodes,
            worklist,
@@ -97,7 +144,7 @@ impl<T: Problem> Computation<T> {
    /// Merge the value at a node with some new value.
    fn merge_node_value(&mut self, node: NodeIndex, value: T::NodeValue) {
        if let Some(old_value) = self.node_values.get(&node) {
-            let merged_value = self.fp_problem.merge(&value, old_value);
+            let merged_value = self.fp_context.merge(&value, old_value);
            if merged_value != *old_value {
                self.set_node_value(node, merged_value);
            }
@@ -109,12 +156,12 @@ impl<T: Problem> Computation<T> {
    /// Compute and update the value at the end node of an edge.
    fn update_edge(&mut self, edge: EdgeIndex) {
        let (start_node, end_node) = self
-            .fp_problem
+            .fp_context
            .get_graph()
            .edge_endpoints(edge)
            .expect("Edge not found");
        if let Some(start_val) = self.node_values.get(&start_node) {
-            if let Some(new_end_val) = self.fp_problem.update_edge(start_val, edge) {
+            if let Some(new_end_val) = self.fp_context.update_edge(start_val, edge) {
                self.merge_node_value(end_node, new_end_val);
            }
        }
@@ -123,7 +170,7 @@ impl<T: Problem> Computation<T> {
    /// Update all outgoing edges of a node.
    fn update_node(&mut self, node: NodeIndex) {
        let edges: Vec<EdgeIndex> = self
-            .fp_problem
+            .fp_context
            .get_graph()
            .edges(node)
            .map(|edge_ref| edge_ref.id())
@@ -137,7 +184,7 @@ impl<T: Problem> Computation<T> {
    /// Each node will be visited at most max_steps times.
    /// If a node does not stabilize after max_steps visits, the end result will not be a fixpoint but only an intermediate result of a fixpoint computation.
    pub fn compute_with_max_steps(&mut self, max_steps: u64) {
-        let mut steps = vec![0; self.fp_problem.get_graph().node_count()];
+        let mut steps = vec![0; self.fp_context.get_graph().node_count()];
        while let Some(priority) = self.worklist.pop() {
            let node = self.priority_to_node_list[priority];
            if steps[node.index()] < max_steps {
@@ -163,7 +210,12 @@ impl<T: Problem> Computation<T> {

    /// Get a reference to the underlying graph
    pub fn get_graph(&self) -> &DiGraph<T::NodeLabel, T::EdgeLabel> {
-        self.fp_problem.get_graph()
+        self.fp_context.get_graph()
+    }
+
+    /// Get a reference to the underlying context object
+    pub fn get_context(&self) -> &T {
+        &self.fp_context
    }
 }

@@ -171,11 +223,11 @@ impl<T: Problem> Computation<T> {
 mod tests {
    use super::*;

-    struct FPProblem {
+    struct FPContext {
        graph: DiGraph<(), u64>,
    }

-    impl Problem for FPProblem {
+    impl Context for FPContext {
        type EdgeLabel = u64;
        type NodeLabel = ();
        type NodeValue = u64;
@@ -207,7 +259,7 @@ mod tests {
        }
        graph.add_edge(NodeIndex::new(100), NodeIndex::new(0), 0);

-        let mut solution = Computation::new(FPProblem { graph }, None);
+        let mut solution = Computation::new(FPContext { graph }, None);
        solution.set_node_value(NodeIndex::new(0), 0);
        solution.compute_with_max_steps(20);


--- a/cwe_checker_rs/src/analysis/graph.rs
+++ b/cwe_checker_rs/src/analysis/graph.rs
-/*!
-This module implements functions to generate (interprocedural) control flow graphs out of a program term.
-*/
+//! Generate control flow graphs out of a program term.
+//!
+//! The generated graphs follow some basic principles:
+//! * **Nodes** denote specific (abstract) points in time during program execution,
+//! i.e. information does not change on a node.
+//! So a basic block itself is not a node,
+//! but the points in time before and after execution of the basic block can be nodes.
+//! * **Edges** denote either transitions between the points in time of their start and end nodes during program execution
+//! or they denote (artificial) information flow between nodes. See the `CRCallStub` edges of interprocedural control flow graphs
+//! for an example of an edge that is only meant for information flow and not actual control flow.
+//!
+//! # General assumptions
+//!
+//! The graph construction algorithm assumes
+//! that each basic block of the program term ends with zero, one or two jump instructions.
+//! In the case of two jump instructions the first one is a conditional jump
+//! and the second one is an unconditional jump.
+//! Conditional calls are not supported.
+//! Missing jump instructions are supported to indicate incomplete information about the control flow,
+//! i.e. points where the control flow reconstruction failed.
+//! These points are converted to dead ends in the control flow graphs.
+//!
+//! # Interprocedural control flow graph
+//!
+//! The function [`get_program_cfg`](fn.get_program_cfg.html) builds an interprocedural control flow graph out of a program term as follows:
+//! * Each basic block is converted into two nodes, *BlkStart* and *BlkEnd*,
+//! and a *block* edge from *BlkStart* to *BlkEnd*.
+//! * Jumps and calls inside the program are converted to *Jump* or *Call* edges from the *BlkEnd* node of their source
+//! to the *BlkStart* node of their target (which is the first block of the target function in case of calls).
+//! * Calls to library functions outside the program are converted to *ExternCallStub* edges
+//! from the *BlkEnd* node of the callsite to the *BlkStart* node of the basic block the call returns to
+//! (if the call returns at all).
+//! * For each in-program call and corresponding return jump one node and three edges are generated:
+//!   * An artificial node *CallReturn*
+//!   * A *CRCallStub* edge from the *BlkEnd* node of the callsite to *CallReturn*
+//!   * A *CRReturnStub* edge from the *BlkEnd* node of the returning from block to *CallReturn*
+//!   * A *CRCombine* edge from *CallReturn* to the *BlkStart* node of the returned to block.
+//!
+//! The artificial *CallReturn* nodes enable enriching the information flowing through a return edge
+//! with information recovered from the corresponding callsite during a fixpoint computation.

+use crate::prelude::*;
 use crate::term::*;
 use petgraph::graph::{DiGraph, NodeIndex};
-use serde::Serialize;
 use std::collections::{HashMap, HashSet};

 /// The graph type of an interprocedural control flow graph
 pub type Graph<'a> = DiGraph<Node<'a>, Edge<'a>>;

 /// The node type of an interprocedural control flow graph
+///
+/// Each node carries a pointer to its associated block with it.
+/// For `CallReturn`nodes the associated block is the callsite block (containing the call instruction)
+/// and *not* the return block (containing the return instruction).
 #[derive(Serialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
 pub enum Node<'a> {
    BlkStart(&'a Term<Blk>),
    BlkEnd(&'a Term<Blk>),
-    CallReturn(&'a Term<Blk>), // The block is the one from the call instruction
+    CallReturn(&'a Term<Blk>),
 }

 impl<'a> Node<'a> {
+    /// Get the block corresponding to the node.
    pub fn get_block(&self) -> &'a Term<Blk> {
        use Node::*;
        match self {
@@ -37,12 +79,14 @@ impl<'a> std::fmt::Display for Node<'a> {
    }
 }

-// TODO: document that we assume that the graph only has blocks with either:
-// - one unconditional call instruction
-// - one return instruction
-// - at most 2 intraprocedural jump instructions, i.e. at most one of them is a conditional jump
-
-/// The node type of an interprocedural fixpoint graph
+/// The edge type of an interprocedural fixpoint graph.
+///
+/// Where applicable the edge carries a reference to the corresponding jump instruction.
+/// For `CRCombine` edges the corresponding jump is the call and not the return jump.
+/// Intraprocedural jumps carry a second optional reference,
+/// which is only set if the jump directly follows an conditional jump,
+/// i.e. it represents the "conditional jump not taken" branch.
+/// In this case the other jump reference points to the untaken conditional jump.
 #[derive(Serialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
 pub enum Edge<'a> {
    Block,
@@ -59,12 +103,14 @@ struct GraphBuilder<'a> {
    program: &'a Term<Program>,
    extern_subs: HashSet<Tid>,
    graph: Graph<'a>,
-    jump_targets: HashMap<Tid, (NodeIndex, NodeIndex)>, // Denotes the NodeIndices of possible jump targets
-    return_addresses: HashMap<Tid, Vec<(NodeIndex, NodeIndex)>>, // for each function the list of return addresses of the corresponding call sites
+    /// Denotes the NodeIndices of possible jump targets
+    jump_targets: HashMap<Tid, (NodeIndex, NodeIndex)>,
+    /// for each function the list of return addresses of the corresponding call sites
+    return_addresses: HashMap<Tid, Vec<(NodeIndex, NodeIndex)>>,
 }

 impl<'a> GraphBuilder<'a> {
-    /// create a new builder with an amtpy graph
+    /// create a new builder with an emtpy graph
    pub fn new(program: &'a Term<Program>, extern_subs: HashSet<Tid>) -> GraphBuilder<'a> {
        GraphBuilder {
            program,
@@ -161,7 +207,7 @@ impl<'a> GraphBuilder<'a> {
        let block: &'a Term<Blk> = self.graph[node].get_block();
        let jumps = block.term.jmps.as_slice();
        match jumps {
-            [] => (), // TODO: Decide whether blocks without jumps should be considered hard errors or (silent) dead ends
+            [] => (), // Blocks without jumps are dead ends corresponding to control flow reconstruction errors.
            [jump] => self.add_jump_edge(node, jump, None),
            [if_jump, else_jump] => {
                self.add_jump_edge(node, if_jump, None);
@@ -173,8 +219,8 @@ impl<'a> GraphBuilder<'a> {

    /// For each return instruction and each corresponding call, add the following to the graph:
    /// - a CallReturn node.
-    /// - edges from the callsite and from the returning-from-site to the CallReturn node
-    /// - an edge from the CallReturn node to the return-to-site
+    /// - edges from the callsite and from the returning-from site to the CallReturn node
+    /// - an edge from the CallReturn node to the return-to site
    fn add_call_return_node_and_edges(
        &mut self,
        return_from_sub: &Term<Sub>,
@@ -237,7 +283,7 @@ impl<'a> GraphBuilder<'a> {
    }
 }

-/// This function builds the interprocedural control flow graph for a program term.
+/// Build the interprocedural control flow graph for a program term.
 pub fn get_program_cfg(program: &Term<Program>, extern_subs: HashSet<Tid>) -> Graph {
    let builder = GraphBuilder::new(program, extern_subs);
    builder.build()

--- a/cwe_checker_rs/src/analysis/interprocedural_fixpoint.rs
+++ b/cwe_checker_rs/src/analysis/interprocedural_fixpoint.rs
--- a/cwe_checker_rs/src/analysis/pointer_inference/context.rs
+++ b/cwe_checker_rs/src/analysis/pointer_inference/context.rs
@@ -58,7 +58,7 @@ impl<'a> Context<'a> {
    }
 }

-impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a> {
+impl<'a> crate::analysis::interprocedural_fixpoint::Context<'a> for Context<'a> {
    type Value = State;

    fn get_graph(&self) -> &Graph<'a> {
@@ -69,7 +69,7 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a> 
        value1.merge(value2)
    }

-    fn update_def(&self, state: &Self::Value, def: &Term<Def>) -> Self::Value {
+    fn update_def(&self, state: &Self::Value, def: &Term<Def>) -> Option<Self::Value> {
        // first check for use-after-frees
        if state.contains_access_of_dangling_memory(&def.term.rhs) {
            let warning = CweWarning {
@@ -91,7 +91,7 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a> 
            Expression::Store { .. } => {
                let mut state = state.clone();
                self.log_debug(state.handle_store_exp(&def.term.rhs), Some(&def.tid));
-                state
+                Some(state)
            }
            Expression::IfThenElse {
                condition,
@@ -120,14 +120,14 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a> 
                match state.eval(condition) {
                    Ok(Data::Value(cond)) if !cond.is_top() => {
                        if cond == Bitvector::from_bit(true).into() {
-                            true_state
+                            Some(true_state)
                        } else if cond == Bitvector::from_bit(false).into() {
-                            false_state
+                            Some(false_state)
                        } else {
                            panic!("IfThenElse with wrong condition bitsize encountered")
                        }
                    }
-                    Ok(_) => true_state.merge(&false_state),
+                    Ok(_) => Some(true_state.merge(&false_state)),
                    Err(err) => panic!("IfThenElse-Condition evaluation failed: {}", err),
                }
            }
@@ -137,7 +137,7 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a> 
                    new_state.handle_register_assign(&def.term.lhs, expression),
                    Some(&def.tid),
                );
-                new_state
+                Some(new_state)
            }
        }
    }
@@ -147,6 +147,7 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a> 
        value: &State,
        _jump: &Term<Jmp>,
        _untaken_conditional: Option<&Term<Jmp>>,
+        _target: &Term<Blk>,
    ) -> Option<State> {
        // TODO: Implement some real specialization of conditionals!
        let mut new_value = value.clone();
@@ -159,7 +160,7 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a> 
        state: &State,
        call_term: &Term<Jmp>,
        _target_node: &crate::analysis::graph::Node,
-    ) -> State {
+    ) -> Option<State> {
        let call = if let JmpKind::Call(ref call) = call_term.term.kind {
            call
        } else {
@@ -220,7 +221,7 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a> 
            callee_state.ids_known_to_caller = callee_state.memory.get_all_object_ids();
            callee_state.ids_known_to_caller.remove(&callee_stack_id);

-            callee_state
+            Some(callee_state)
        } else {
            panic!("Indirect call edges not yet supported.")
            // TODO: Support indirect call edges!
@@ -595,7 +596,7 @@ mod tests {

    #[test]
    fn context_problem_implementation() {
-        use crate::analysis::interprocedural_fixpoint::Problem;
+        use crate::analysis::interprocedural_fixpoint::Context as IpFpContext;
        use crate::analysis::pointer_inference::Data;
        use crate::bil::*;
        use Expression::*;
@@ -632,10 +633,10 @@ mod tests {
        };

        // test update_def
-        state = context.update_def(&state, &def);
+        state = context.update_def(&state, &def).unwrap();
        let stack_pointer = Data::Pointer(PointerDomain::new(new_id("main", "RSP"), bv(-16)));
        assert_eq!(state.eval(&Var(register("RSP"))).unwrap(), stack_pointer);
-        state = context.update_def(&state, &store_term);
+        state = context.update_def(&state, &store_term).unwrap();

        // Test update_call
        let target_block = Term {
@@ -647,7 +648,7 @@ mod tests {
        };
        let target_node = crate::analysis::graph::Node::BlkStart(&target_block);
        let call = call_term("func");
-        let mut callee_state = context.update_call(&state, &call, &target_node);
+        let mut callee_state = context.update_call(&state, &call, &target_node).unwrap();
        assert_eq!(callee_state.stack_id, new_id("func", "RSP"));
        assert_eq!(callee_state.caller_stack_ids.len(), 1);
        assert_eq!(
@@ -763,7 +764,7 @@ mod tests {

    #[test]
    fn update_return() {
-        use crate::analysis::interprocedural_fixpoint::Problem;
+        use crate::analysis::interprocedural_fixpoint::Context as IpFpContext;
        use crate::analysis::pointer_inference::object::ObjectType;
        use crate::analysis::pointer_inference::Data;
        let project = mock_project();
@@ -771,10 +772,12 @@ mod tests {
        let (log_sender, _log_receiver) = crossbeam_channel::unbounded();
        let context = Context::new(&project, cwe_sender, log_sender);
        let state_before_return = State::new(&register("RSP"), Tid::new("callee"));
-        let mut state_before_return = context.update_def(
-            &state_before_return,
-            &reg_add_term("RSP", 8, "stack_offset_on_return_adjustment"),
-        );
+        let mut state_before_return = context
+            .update_def(
+                &state_before_return,
+                &reg_add_term("RSP", 8, "stack_offset_on_return_adjustment"),
+            )
+            .unwrap();

        let callsite_id = new_id("call_callee", "RSP");
        state_before_return.memory.add_abstract_object(
@@ -814,10 +817,12 @@ mod tests {
            .unwrap();

        let state_before_call = State::new(&register("RSP"), Tid::new("original_caller_id"));
-        let mut state_before_call = context.update_def(
-            &state_before_call,
-            &reg_add_term("RSP", -16, "stack_offset_on_call_adjustment"),
-        );
+        let mut state_before_call = context
+            .update_def(
+                &state_before_call,
+                &reg_add_term("RSP", -16, "stack_offset_on_call_adjustment"),
+            )
+            .unwrap();
        let caller_caller_id = new_id("caller_caller", "RSP");
        state_before_call.memory.add_abstract_object(
            caller_caller_id.clone(),