Unverified Commit 83593fac by Melvin Klimke Committed by GitHub

Runtime optimizations for CWE 78 check (#154)

parent 43b56cd3
......@@ -40,7 +40,7 @@
//! - Missing Taints due to lost track of pointer targets
//! - Non tracked function parameters cause incomplete taints that could miss possible dangerous inputs
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use crate::{
analysis::{
......@@ -48,13 +48,16 @@ use crate::{
graph::{self, Edge, Node},
interprocedural_fixpoint_generic::NodeValue,
},
intermediate_representation::{Jmp, Project, Sub},
intermediate_representation::{ExternSymbol, Jmp, Project, Sub},
prelude::*,
utils::log::{CweWarning, LogMessage},
AnalysisResults, CweModule,
};
use petgraph::{graph::NodeIndex, visit::EdgeRef};
use petgraph::{
graph::NodeIndex,
visit::{EdgeRef, IntoNodeReferences},
};
mod state;
use state::*;
......@@ -89,21 +92,25 @@ pub fn check_cwe(
let project = analysis_results.project;
let pointer_inference_results = analysis_results.pointer_inference.unwrap();
let mut cwe_78_graph = analysis_results.control_flow_graph.clone();
cwe_78_graph.reverse();
let (cwe_sender, cwe_receiver) = crossbeam_channel::unbounded();
let config: Config = serde_json::from_value(cwe_params.clone()).unwrap();
let system_symbols =
crate::utils::symbol_utils::get_symbol_map(project, &config.system_symbols[..]);
let string_symbols =
crate::utils::symbol_utils::get_symbol_map(project, &config.string_symbols[..]);
let user_input_symbols =
crate::utils::symbol_utils::get_symbol_map(project, &config.user_input_symbols[..]);
let symbol_maps: SymbolMaps = SymbolMaps::new(project, &config);
let block_maps = BlockMaps::new(analysis_results);
let general_context = Context::new(
project,
analysis_results.runtime_memory_image,
&pointer_inference_results,
string_symbols,
user_input_symbols,
std::sync::Arc::new(cwe_78_graph),
pointer_inference_results,
std::sync::Arc::new(symbol_maps),
std::sync::Arc::new(block_maps),
cwe_sender,
);
......@@ -203,3 +210,89 @@ fn get_entry_sub_to_entry_node_map(
})
.collect()
}
/// - string_symbols:
/// - Maps the TID of an extern string related symbol to the corresponding extern symbol struct.
/// - user_input_symbols:
/// - Maps the TID of an extern symbol that take input from the user to the corresponding extern symbol struct.
/// - extern_symbol_map:
/// - Maps the TID of an extern symbol to the extern symbol struct.
pub struct SymbolMaps<'a> {
string_symbol_map: HashMap<Tid, &'a ExternSymbol>,
user_input_symbol_map: HashMap<Tid, &'a ExternSymbol>,
extern_symbol_map: HashMap<Tid, &'a ExternSymbol>,
}
impl<'a> SymbolMaps<'a> {
/// Creates a new instance of the symbol maps struct.
pub fn new(project: &'a Project, config: &Config) -> Self {
let mut extern_symbol_map = HashMap::new();
for symbol in project.program.term.extern_symbols.iter() {
extern_symbol_map.insert(symbol.tid.clone(), symbol);
}
SymbolMaps {
string_symbol_map: crate::utils::symbol_utils::get_symbol_map(
project,
&config.string_symbols[..],
),
user_input_symbol_map: crate::utils::symbol_utils::get_symbol_map(
project,
&config.user_input_symbols[..],
),
extern_symbol_map,
}
}
}
/// - block_first_def_set:
/// - A set containing a given [`Def`] as the first `Def` of the block.
/// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
/// - block_start_last_def_map:
/// - A map to get the node index of the `BlkStart` node containing a given [`Def`] as the last `Def` of the block.
/// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
/// - jmp_to_blk_end_node_map:
/// - A map to get the node index of the `BlkEnd` node containing a given [`Jmp`].
/// The keys are of the form `(Jmp-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
pub struct BlockMaps {
block_first_def_set: HashSet<(Tid, Tid)>,
block_start_last_def_map: HashMap<(Tid, Tid), NodeIndex>,
jmp_to_blk_end_node_map: HashMap<(Tid, Tid), NodeIndex>,
}
impl BlockMaps {
/// Creates a new instance of the block maps struct using the analysis results.
pub fn new(analysis_results: &AnalysisResults) -> Self {
let mut block_first_def_set = HashSet::new();
let mut block_start_last_def_map = HashMap::new();
let mut jmp_to_blk_end_node_map = HashMap::new();
for (node_id, node) in analysis_results.control_flow_graph.node_references() {
match node {
Node::BlkStart(block, sub) => match block.term.defs.len() {
0 => (),
num_of_defs => {
let first_def = block.term.defs.get(0).unwrap();
let last_def = block.term.defs.get(num_of_defs - 1).unwrap();
block_first_def_set.insert((first_def.tid.clone(), sub.tid.clone()));
block_start_last_def_map
.insert((last_def.tid.clone(), sub.tid.clone()), node_id);
}
},
Node::BlkEnd(block, sub) => {
for jmp in block.term.jmps.iter() {
jmp_to_blk_end_node_map.insert((jmp.tid.clone(), sub.tid.clone()), node_id);
}
}
_ => (),
}
}
BlockMaps {
block_first_def_set,
block_start_last_def_map,
jmp_to_blk_end_node_map,
}
}
}
......@@ -3,14 +3,13 @@ use std::{
sync::Arc,
};
use petgraph::{graph::NodeIndex, visit::IntoNodeReferences};
use petgraph::graph::NodeIndex;
use super::{state::State, CWE_MODULE};
use super::{state::State, BlockMaps, SymbolMaps, CWE_MODULE};
use crate::{
abstract_domain::AbstractDomain,
analysis::{
forward_interprocedural_fixpoint::Context as PiContext,
graph::{Graph, Node},
forward_interprocedural_fixpoint::Context as PiContext, graph::Graph,
interprocedural_fixpoint_generic::NodeValue,
pointer_inference::PointerInference as PointerInferenceComputation,
pointer_inference::State as PointerInferenceState,
......@@ -27,29 +26,31 @@ pub struct Context<'a> {
/// A pointer to the representation of the runtime memory image.
runtime_memory_image: &'a RuntimeMemoryImage,
/// The reversed control flow graph for the analysis
graph: Graph<'a>,
graph: Arc<Graph<'a>>,
/// A pointer to the results of the pointer inference analysis.
/// They are used to determine the targets of pointers to memory,
/// which in turn is used to keep track of taint on the stack or on the heap.
pub pointer_inference_results: &'a PointerInferenceComputation<'a>,
/// A map to get the node index of the `BlkStart` node containing a given [`Def`] as the last `Def` of the block.
/// - block_first_def_set:
/// - A set containing a given [`Def`] as the first `Def` of the block.
/// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
block_start_last_def_map: Arc<HashMap<(Tid, Tid), NodeIndex>>,
/// A set containing a given [`Def`] as the first `Def` of the block.
/// - block_start_last_def_map:
/// - A map to get the node index of the `BlkStart` node containing a given [`Def`] as the last `Def` of the block.
/// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
block_first_def_set: Arc<HashSet<(Tid, Tid)>>,
/// Maps the TID of an extern symbol to the extern symbol struct.
extern_symbol_map: Arc<HashMap<Tid, &'a ExternSymbol>>,
/// Maps the TID of an extern string related symbol to the corresponding extern symbol struct.
string_symbol_map: Arc<HashMap<Tid, &'a ExternSymbol>>,
/// Maps the TID of an extern symbol that take input from the user to the corresponding extern symbol struct.
user_input_symbol_map: Arc<HashMap<Tid, &'a ExternSymbol>>,
/// A map to get the node index of the `BlkEnd` node containing a given [`Jmp`].
/// - jmp_to_blk_end_node_map:
/// - A map to get the node index of the `BlkEnd` node containing a given [`Jmp`].
/// The keys are of the form `(Jmp-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
jmp_to_blk_end_node_map: Arc<HashMap<(Tid, Tid), NodeIndex>>,
block_maps: Arc<BlockMaps>,
/// - string_symbols:
/// - Maps the TID of an extern string related symbol to the corresponding extern symbol struct.
/// - user_input_symbols:
/// - Maps the TID of an extern symbol that take input from the user to the corresponding extern symbol struct.
/// - extern_symbol_map:
/// - Maps the TID of an extern symbol to the extern symbol struct.
symbol_maps: Arc<SymbolMaps<'a>>,
/// The call whose parameter values are the sources for taint for the analysis.
pub taint_source: Option<&'a Term<Jmp>>,
/// The subroutine from which the taint source originates
......@@ -65,53 +66,19 @@ impl<'a> Context<'a> {
pub fn new(
project: &'a Project,
runtime_memory_image: &'a RuntimeMemoryImage,
graph: Arc<Graph<'a>>,
pointer_inference_results: &'a PointerInferenceComputation<'a>,
string_symbols: HashMap<Tid, &'a ExternSymbol>,
user_input_symbols: HashMap<Tid, &'a ExternSymbol>,
symbol_maps: Arc<SymbolMaps<'a>>,
block_maps: Arc<BlockMaps>,
cwe_collector: crossbeam_channel::Sender<CweWarning>,
) -> Self {
let mut block_first_def_set = HashSet::new();
let mut block_start_last_def_map = HashMap::new();
let mut extern_symbol_map = HashMap::new();
for symbol in project.program.term.extern_symbols.iter() {
extern_symbol_map.insert(symbol.tid.clone(), symbol);
}
let mut jmp_to_blk_end_node_map = HashMap::new();
let graph = pointer_inference_results.get_graph();
for (node_id, node) in graph.node_references() {
match node {
Node::BlkStart(block, sub) => match block.term.defs.len() {
0 => (),
num_of_defs => {
let first_def = block.term.defs.get(0).unwrap();
let last_def = block.term.defs.get(num_of_defs - 1).unwrap();
block_first_def_set.insert((first_def.tid.clone(), sub.tid.clone()));
block_start_last_def_map
.insert((last_def.tid.clone(), sub.tid.clone()), node_id);
}
},
Node::BlkEnd(block, sub) => {
for jmp in block.term.jmps.iter() {
jmp_to_blk_end_node_map.insert((jmp.tid.clone(), sub.tid.clone()), node_id);
}
}
_ => (),
}
}
let mut cwe_78_graph = graph.clone();
cwe_78_graph.reverse();
Context {
project,
runtime_memory_image,
graph: cwe_78_graph,
graph,
pointer_inference_results,
block_start_last_def_map: Arc::new(block_start_last_def_map),
block_first_def_set: Arc::new(block_first_def_set),
extern_symbol_map: Arc::new(extern_symbol_map),
string_symbol_map: Arc::new(string_symbols),
user_input_symbol_map: Arc::new(user_input_symbols),
jmp_to_blk_end_node_map: Arc::new(jmp_to_blk_end_node_map),
symbol_maps,
block_maps,
taint_source: None,
taint_source_sub: None,
taint_source_name: None,
......@@ -235,7 +202,12 @@ impl<'a> Context<'a> {
let mut new_state = state.clone();
// Check if the extern symbol is a string symbol, since the return register is not tainted for these.
// Instead, is has to be checked whether the first function parameter points to a tainted memory address
if self.string_symbol_map.get(&symbol.tid).is_some() {
if self
.symbol_maps
.string_symbol_map
.get(&symbol.tid)
.is_some()
{
new_state.remove_non_callee_saved_taint(symbol.get_calling_convention(self.project));
new_state = self.taint_string_function_parameters(&new_state, symbol, call_source_node);
} else {
......@@ -254,7 +226,12 @@ impl<'a> Context<'a> {
.remove_non_callee_saved_taint(symbol.get_calling_convention(self.project));
// TODO: Parameter detection since targets of input parameters are the return locations
// Taint memory for string inputs
if self.user_input_symbol_map.get(&symbol.tid).is_some() {
if self
.symbol_maps
.user_input_symbol_map
.get(&symbol.tid)
.is_some()
{
self.generate_cwe_warning(
&new_state.get_current_sub().as_ref().unwrap().term.name,
);
......@@ -328,6 +305,7 @@ impl<'a> Context<'a> {
) -> Option<NodeIndex> {
if let Some(sub) = state.get_current_sub() {
if let Some(node) = self
.block_maps
.block_start_last_def_map
.get(&(def.tid.clone(), sub.tid.clone()))
{
......@@ -403,7 +381,7 @@ impl<'a> Context<'a> {
/// Gets the BlkEnd node of an external function call
pub fn get_source_node(&self, state: &State, call_source: &Tid) -> NodeIndex {
let blk_end_node_id = self.jmp_to_blk_end_node_map.get(&(
let blk_end_node_id = self.block_maps.jmp_to_blk_end_node_map.get(&(
call_source.clone(),
state.get_current_sub().as_ref().unwrap().tid.clone(),
));
......@@ -484,6 +462,7 @@ impl<'a> crate::analysis::backward_interprocedural_fixpoint::Context<'a> for Con
// Check whether the current def term is the first of the block and if so, remove
// the pi_def_map for the current state to save memory
if self
.block_maps
.block_first_def_set
.get(&(
def.tid.clone(),
......@@ -592,7 +571,7 @@ impl<'a> crate::analysis::backward_interprocedural_fixpoint::Context<'a> for Con
match &call.term {
Jmp::Call { target, .. } => {
let source_node = self.get_source_node(&new_state, &call.tid);
if let Some(extern_symbol) = self.extern_symbol_map.get(target) {
if let Some(extern_symbol) = self.symbol_maps.extern_symbol_map.get(target) {
new_state = self.taint_generic_function_parameters_and_remove_non_callee_saved(
&new_state,
extern_symbol,
......
use petgraph::visit::IntoNodeReferences;
use super::*;
use crate::analysis::backward_interprocedural_fixpoint::Context as BackwardContext;
use crate::analysis::{backward_interprocedural_fixpoint::Context as BackwardContext, graph::Node};
use crate::{
abstract_domain::{DataDomain, PointerDomain, SizedDomain},
analysis::pointer_inference::{Data, State as PointerInferenceState, ValueDomain},
......@@ -111,12 +113,57 @@ impl<'a> Context<'a> {
mem_image: &'a RuntimeMemoryImage,
) -> Self {
let (cwe_sender, _) = crossbeam_channel::unbounded();
let mut graph = pi_results.get_graph().clone();
graph.reverse();
let mut extern_symbol_map = HashMap::new();
for symbol in project.program.term.extern_symbols.iter() {
extern_symbol_map.insert(symbol.tid.clone(), symbol);
}
let mut block_first_def_set: HashSet<(Tid, Tid)> = HashSet::new();
let mut block_start_last_def_map = HashMap::new();
let mut jmp_to_blk_end_node_map = HashMap::new();
for (node_id, node) in graph.node_references() {
match node {
Node::BlkStart(block, sub) => match block.term.defs.len() {
0 => (),
num_of_defs => {
let first_def = block.term.defs.get(0).unwrap();
let last_def = block.term.defs.get(num_of_defs - 1).unwrap();
block_first_def_set.insert((first_def.tid.clone(), sub.tid.clone()));
block_start_last_def_map
.insert((last_def.tid.clone(), sub.tid.clone()), node_id);
}
},
Node::BlkEnd(block, sub) => {
for jmp in block.term.jmps.iter() {
jmp_to_blk_end_node_map.insert((jmp.tid.clone(), sub.tid.clone()), node_id);
}
}
_ => (),
}
}
let block_maps: BlockMaps = BlockMaps {
block_first_def_set,
block_start_last_def_map,
jmp_to_blk_end_node_map,
};
let symbol_maps: SymbolMaps = SymbolMaps {
string_symbol_map: string_symbols,
user_input_symbol_map: HashMap::new(),
extern_symbol_map,
};
Context::new(
project,
mem_image,
std::sync::Arc::new(graph),
pi_results,
string_symbols,
HashMap::new(),
std::sync::Arc::new(symbol_maps),
std::sync::Arc::new(block_maps),
cwe_sender,
)
}
......@@ -164,6 +211,7 @@ fn tainting_string_function_parameters() {
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let node_id = context
.block_maps
.jmp_to_blk_end_node_map
.get(&(Tid::new("call_string"), Tid::new("func")))
.unwrap();
......@@ -253,6 +301,7 @@ fn tainting_generic_function_parameters_and_removing_non_callee_saved() {
string_syms.insert(Tid::new("sprintf"), &setup.string_sym);
let context = Context::mock(&setup.project, string_syms, &pi_results, &mem_image);
let node_id = context
.block_maps
.jmp_to_blk_end_node_map
.get(&(Tid::new("call_string"), Tid::new("func")))
.unwrap();
......@@ -327,6 +376,7 @@ fn tainting_stack_parameters() {
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let call_source_node = context
.block_maps
.jmp_to_blk_end_node_map
.get(&(Tid::new("call_string"), Tid::new("func")))
.unwrap();
......@@ -366,6 +416,7 @@ fn tainting_parameters() {
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let call_source_node = context
.block_maps
.jmp_to_blk_end_node_map
.get(&(Tid::new("call_string"), Tid::new("func")))
.unwrap();
......@@ -407,6 +458,7 @@ fn creating_pi_def_map() {
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let current_sub = setup.project.program.term.subs.get(0).unwrap();
let start_node = context
.block_maps
.block_start_last_def_map
.get(&(def2.clone(), current_sub.tid.clone()))
.unwrap();
......@@ -452,6 +504,7 @@ fn getting_blk_start_node_if_last_def() {
setup.state.set_current_sub(current_sub);
let start_node = context
.block_maps
.block_start_last_def_map
.get(&(def2.tid.clone(), current_sub.tid.clone()))
.unwrap();
......@@ -481,6 +534,7 @@ fn getting_source_node() {
setup.state.set_current_sub(current_sub);
let blk_end_node_id = context
.block_maps
.jmp_to_blk_end_node_map
.get(&(call_tid.clone(), current_sub.tid.clone()))
.unwrap();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment