Unverified Commit e8251916 by Melvin Klimke Committed by GitHub

Abstract string domains (#235)

Adds a string abstraction analysis and includes a rewrite of the CWE-78 check using the new abstract string domains.
parent cfc85702
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "acceptance_tests_ghidra"
version = "0.1.0"
......@@ -160,6 +162,7 @@ dependencies = [
"fnv",
"gcd",
"goblin",
"itertools",
"petgraph",
"regex",
"serde",
......@@ -207,6 +210,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56899898ce76aaf4a0f24d914c97ea6ed976d42fec6ad33fcbb0a1103e07b2b0"
[[package]]
name = "either"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
name = "fixedbitset"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
......@@ -287,6 +296,15 @@ dependencies = [
]
[[package]]
name = "itertools"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
......
......@@ -176,17 +176,38 @@ fn run_with_ghidra(args: &CmdlineArgs) {
&project,
);
let modules_depending_on_pointer_inference = vec!["CWE78", "CWE134", "CWE476", "Memory"];
let pointer_inference_results = if modules
let modules_depending_on_string_abstraction = vec!["CWE78"];
let modules_depending_on_pointer_inference = vec!["CWE134", "CWE476", "Memory"];
let string_abstraction_needed = modules
.iter()
.any(|module| modules_depending_on_pointer_inference.contains(&module.name))
{
.any(|module| modules_depending_on_string_abstraction.contains(&module.name));
let pi_analysis_needed = string_abstraction_needed
|| modules
.iter()
.any(|module| modules_depending_on_pointer_inference.contains(&module.name));
let pi_analysis_results = if pi_analysis_needed {
Some(analysis_results.compute_pointer_inference(&config["Memory"], args.statistics))
} else {
None
};
let analysis_results = analysis_results.set_pointer_inference(pi_analysis_results.as_ref());
let string_abstraction_results =
if string_abstraction_needed {
Some(analysis_results.compute_string_abstraction(
&config["StringAbstraction"],
pi_analysis_results.as_ref(),
))
} else {
None
};
let analysis_results =
analysis_results.set_pointer_inference(pointer_inference_results.as_ref());
analysis_results.set_string_abstraction(string_abstraction_results.as_ref());
// Print debug and then return.
// Right now there is only one debug printing function.
......
{
"CWE78": {
"system_symbols": [
"system",
"execl"
],
"string_symbols": [
"sprintf",
"snprintf",
"strcat",
"strncat"
],
"user_input_symbols": [
"scanf",
"__isoc99_scanf",
"sscanf",
"__isoc99_sscanf"
],
"format_string_index": {
"sprintf": 1,
"snprintf": 2,
"scanf": 0,
"__isoc99_scanf": 0,
"sscanf": 1,
"__isoc99_sscanf": 1
}
"system"
]
},
"CWE134": {
"format_string_symbols": [
......@@ -253,5 +232,32 @@
"deallocation_symbols": [
"free"
]
},
"StringAbstraction": {
"string_symbols": [
"sprintf",
"snprintf",
"vsprintf",
"vsnprintf",
"strcat",
"strncat",
"scanf",
"__isoc99_scanf",
"sscanf",
"__isoc99_sscanf",
"memcpy",
"free"
],
"format_string_index": {
"printf": 0,
"sprintf": 1,
"snprintf": 2,
"vsprintf": 1,
"vsnprintf": 2,
"scanf": 0,
"__isoc99_scanf": 0,
"sscanf": 1,
"__isoc99_sscanf": 1
}
}
}
......@@ -17,6 +17,7 @@ crossbeam-channel = "0.4"
derive_more = "0.99"
directories = "3.0"
goblin = "0.2"
itertools = "0.10.0"
gcd = "2.0"
[lib]
......
//! This module contains the Brick structure.
//! The Brick structure represents the set of all strings that can be built
//! through concatenation of a given sequence of strings with upper and lower boundaries.
//!
//! For instance, let \[{"mo", "de"}\]^{1,2} be a Brick. The following set of strings is
//! constructed through the aforementioned Brick:
//! - {mo, de, momo, dede, mode, demo}
use std::collections::BTreeSet;
use crate::prelude::*;
use itertools::Itertools;
/// A single Brick with the set of strings, a minimum and maximum bound.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct Brick {
sequence: BTreeSet<String>,
min: u32,
max: u32,
}
impl Default for Brick {
fn default() -> Self {
Self::new()
}
}
impl Brick {
/// Creates a new instance of the Brick struct.
pub fn new() -> Self {
Brick {
sequence: BTreeSet::new(),
min: 0,
max: 0,
}
}
/// Set the sequence of the Brick.
pub fn set_sequence(&mut self, sequence: BTreeSet<String>) {
self.sequence = sequence;
}
/// Set the minimum bound for the element occurrences in the Brick.
pub fn set_min(&mut self, min: u32) {
self.min = min;
}
/// Set the maximum bound for the element occurrences in the Brick.
pub fn set_max(&mut self, max: u32) {
self.max = max;
}
/// Returns a reference to the string sequence in the brick.
pub fn get_sequence(&self) -> &BTreeSet<String> {
&self.sequence
}
/// Returns the minimum occurrence of the sequences contained in the brick.
pub fn get_min(&self) -> u32 {
self.min
}
/// Returns the maximum occurrence of the sequences contained in the brick.
pub fn get_max(&self) -> u32 {
self.max
}
/// Checks whether a brick represents an empty string (Rule 1)
pub fn is_empty_string(&self) -> bool {
if self.sequence.is_empty() && self.min == 0 && self.max == 0 {
return true;
}
false
}
/// **merge** bricks with the same indices max = 1, min = 1, in a new single brick
/// with the new string set being the concatenation of the former two. e.g. B0 = \[{a,cd}\]^{1,1}
/// and B1 = \[{b,ef}\]^{1,1} become B_new = \[{ab, aef, cdb, cdef}\]^{1,1}.
pub fn merge_bricks_with_bound_one(&self, other: Brick) -> Self {
let product = self
.sequence
.iter()
.cartesian_product(other.sequence.iter())
.collect_vec();
let sequence: BTreeSet<String> = product
.iter()
.map(|&(str1, str2)| str1.clone() + str2)
.collect();
Brick {
sequence,
min: 1,
max: 1,
}
}
/// **transform** a brick in which the number of applications is constant (min = max) into one in which
/// min = max = 1. e.g. B = \[{a,b}\]^{2,2} => B_new = \[{aa, ab, ba, bb}\]^{1,1}.
pub fn transform_brick_with_min_max_equal(&self, length: usize) -> Self {
let permutations: BTreeSet<String> =
Self::generate_permutations_of_fixed_length(length, &self.sequence, Vec::new(), 1)
.into_iter()
.collect();
Brick {
sequence: permutations,
min: 1,
max: 1,
}
}
/// **merge** two bricks in which the set of strings is the same. e.g. B1 = \[S\]^{m1, M1}
/// and B2 = \[S\]^{m2, M2} => B_new = \[S\]^{m1+m2, M1+M2}
pub fn merge_bricks_with_equal_content(&self, other: Brick) -> Self {
Brick {
sequence: self.sequence.clone(),
min: self.min + other.min,
max: self.max + other.max,
}
}
/// **break** a single brick with min >= 1 and max != min into two simpler bricks where B = \[S\]^{min,max} =>
/// B1 = \[S^min\]^{1,1}, B2 = \[S\]^{0, max-min}.
/// e.g. B = \[{a}\]^{2,5} => B1 = \[{aa}\]^{1,1}, B2 = \[{a}\]^{0,3}
pub fn break_single_brick_into_simpler_bricks(&self) -> (Self, Self) {
let brick_1 = self.transform_brick_with_min_max_equal(self.min as usize);
let brick_2 = Brick {
sequence: self.sequence.clone(),
min: 0,
max: self.max - self.min,
};
(brick_1, brick_2)
}
/// Recursive function to generate sequence permutations of fixed length.
/// For instance, \[{a,b}\] with length = 2 becomes \[{aa, ab, ba, bb}\]
/// Note that the length can also be greater or smaller than
/// the number of elements in the sequence.
pub fn generate_permutations_of_fixed_length(
max_length: usize,
sequence: &BTreeSet<String>,
generated: Vec<String>,
current_length: usize,
) -> Vec<String> {
let mut new_gen: Vec<String> = Vec::new();
for s in sequence.iter() {
if generated.is_empty() {
new_gen.push(s.to_string());
} else {
for g in generated.iter() {
new_gen.push(g.clone() + s);
}
}
}
if current_length < max_length {
return Self::generate_permutations_of_fixed_length(
max_length,
sequence,
new_gen,
current_length + 1,
);
}
new_gen
}
}
//! This module implements the widening operator for the BrickDomain and BricksDomain.
//! The exact widening procedure depends on three constants.
//! - The *interval threshold* overapproximates the number of times string sequences can occur in a brick.
//! - The *sequence threshold* overapproximates the number of string sequences in a brick by forcing a *Top* value.
//! - The *length threshold* overapproximates the number of bricks in the BricksDomain and forces a *Top* value.
//! A merge is processed without widening when none of the thresholds are exceeded.
use std::{
cmp::{
max, min,
Ordering::{Equal, Greater, Less},
},
collections::BTreeSet,
};
use crate::abstract_domain::AbstractDomain;
use super::{brick::Brick, BrickDomain, BricksDomain};
pub const INTERVAL_THRESHOLD: usize = 8;
pub const SEQUENCE_THRESHOLD: usize = 8;
pub const LENGTH_THRESHOLD: usize = 32;
impl BricksDomain {
/// The widen function of the BricksDomain widens the values during a merge.
/// If the two BrickDomain lists are not comparable or either list exceeds
/// the length threshold, *Top* is returned.
/// Otherwise, the shorter list is padded and the widen function of the
/// BrickDomain is applied to each element in both lists.
/// If after the widening all BrickDomain values are *Top*, return
/// the *Top* value for the BricksDomain.
pub fn widen(&self, other: &BricksDomain) -> Self {
let self_num_of_bricks = self.unwrap_value().len();
let other_num_of_bricks = other.unwrap_value().len();
let mut new_self = self.clone();
let mut new_other = other.clone();
match self_num_of_bricks.cmp(&other_num_of_bricks) {
Less => {
new_self = self.pad_list(other);
}
Greater => {
new_other = other.pad_list(self);
}
Equal => (),
}
if !new_self.is_less_or_equal(other) && !new_other.is_less_or_equal(self)
|| self_num_of_bricks > LENGTH_THRESHOLD
|| other_num_of_bricks > LENGTH_THRESHOLD
{
return BricksDomain::Top;
}
let mut widened_brick_domain_list: Vec<BrickDomain> = Vec::new();
for (self_brick, other_brick) in new_self
.unwrap_value()
.iter()
.zip(new_other.unwrap_value().iter())
{
widened_brick_domain_list.push(self_brick.merge(other_brick));
}
if BricksDomain::all_bricks_are_top(&widened_brick_domain_list) {
return BricksDomain::Top;
}
BricksDomain::Value(widened_brick_domain_list)
}
/// Checks whether all bricks of the BricksDomain are *Top* values.
/// If so, the BricksDomain itself should be converted into a *Top* value.
pub fn all_bricks_are_top(bricks: &[BrickDomain]) -> bool {
bricks.iter().all(|brick| matches!(brick, BrickDomain::Top))
}
/// Checks whether the current BricksDomain is less or equal than the other BricksDomain
/// by definition of the partial order.
pub fn is_less_or_equal(&self, other: &BricksDomain) -> bool {
self.unwrap_value()
.iter()
.zip(other.unwrap_value().iter())
.all(|(self_brick, other_brick)| self_brick.is_less_or_equal(other_brick))
}
}
impl BrickDomain {
/// The widen function of the BrickDomain takes the union of both
/// BrickDomains and returns *Top* if the number of sequences exceeds
/// a certain threshold.
/// If neither of the domains are *Top*, the minimum and maximum
/// of the interval bounds are taken and it is checked whether
/// their difference exceeds a certain threshold.
/// If so *min* is set to 0 and *max* is set to infinity (here Max value of 32 bits).
/// Otherwise, their values are taken as new bounds for the merged domain.
pub fn widen(&self, other: &BrickDomain) -> Self {
let self_brick = self.unwrap_value();
let other_brick = other.unwrap_value();
let merged_sequence = self_brick
.get_sequence()
.union(other_brick.get_sequence())
.cloned()
.collect::<BTreeSet<String>>();
if merged_sequence.len() > SEQUENCE_THRESHOLD {
return BrickDomain::Top;
}
let mut widened_brick = Brick::new();
let min_bound = min(self_brick.get_min(), other_brick.get_min());
let max_bound = max(self_brick.get_max(), other_brick.get_max());
if max_bound - min_bound > INTERVAL_THRESHOLD as u32 {
widened_brick.set_min(0);
widened_brick.set_max(u32::MAX);
} else {
widened_brick.set_min(min_bound);
widened_brick.set_max(max_bound);
}
widened_brick.set_sequence(merged_sequence);
BrickDomain::Value(widened_brick)
}
/// Checks whether the current BrickDomain is less or equal than the other BrickDomain
/// by definition of the partial order.
/// Empty strings are ignored for order comparisons.
pub fn is_less_or_equal(&self, other: &BrickDomain) -> bool {
match (self.is_top(), other.is_top()) {
(false, false) => {
let self_brick = self.unwrap_value();
let other_brick = other.unwrap_value();
if self_brick.is_empty_string() || other_brick.is_empty_string() {
return true;
}
if self_brick
.get_sequence()
.is_subset(other_brick.get_sequence())
&& self_brick.get_min() >= other_brick.get_min()
&& self_brick.get_max() <= other_brick.get_max()
{
return true;
}
false
}
(true, false) => false,
(false, true) | (true, true) => true,
}
}
}
......@@ -27,7 +27,7 @@ mod trait_impl;
///
/// The domain also contains a flag to indicate that it includes `Top` values,
/// i.e. values of fully unknown origin and offset.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct DataDomain<T: RegisterDomain> {
/// The byte size of the represented values.
size: ByteSize,
......@@ -234,6 +234,10 @@ mod tests {
contains_top_values: false,
}
}
pub fn insert_relative_value(&mut self, id: AbstractIdentifier, offset: T) {
self.relative_values.insert(id, offset);
}
}
fn bv(value: i64) -> BitvectorDomain {
......
......@@ -11,6 +11,11 @@ impl IntervalDomain {
IntervalDomain::new(Bitvector::from_i8(start), Bitvector::from_i8(end))
}
/// Return a new interval domain of 4-byte integers.
pub fn mock_i32(start: i32, end: i32) -> IntervalDomain {
IntervalDomain::new(Bitvector::from_i32(start), Bitvector::from_i32(end))
}
pub fn mock_with_bounds(
lower_bound: Option<i64>,
start: i64,
......
......@@ -19,6 +19,15 @@ pub use mem_region::*;
mod interval;
pub use interval::*;
mod bricks;
pub use bricks::*;
mod character_inclusion;
pub use character_inclusion::*;
mod strings;
pub use strings::*;
mod domain_map;
pub use domain_map::*;
......
/// A set of functions that all abstract string domains should implement.
pub trait DomainInsertion {
/// Inserts a string domain at a certain position if order is considered.
fn append_string_domain(&self, string_domain: &Self) -> Self;
/// Creates a string domain with characters that usually appear in an integer value.
fn create_integer_domain() -> Self;
/// Creates a string domain with characters that usually appear in a char value.
fn create_char_domain() -> Self;
/// Creates a string domain with characters that usually appear in a float value.
fn create_float_value_domain() -> Self;
/// Creates a string domain with characters that usually appear in a String value.
fn create_pointer_value_domain() -> Self;
/// Creates a top value of the currently used domain.
fn create_top_value_domain() -> Self;
/// Creates an empty string domain.
fn create_empty_string_domain() -> Self;
}
......@@ -8,3 +8,4 @@ pub mod forward_interprocedural_fixpoint;
pub mod graph;
pub mod interprocedural_fixpoint_generic;
pub mod pointer_inference;
pub mod string_abstraction;
//! This module contains the Context Object for the String Abstraction.
//! It holds all necessary information that stays unchanged during the analysis.
use std::{
collections::{HashMap, HashSet},
marker::PhantomData,
};
use petgraph::{graph::NodeIndex, visit::IntoNodeReferences};
use crate::{
abstract_domain::{AbstractDomain, DomainInsertion, HasTop},
analysis::{
forward_interprocedural_fixpoint::Context as _, graph::Node,
interprocedural_fixpoint_generic::NodeValue,
pointer_inference::PointerInference as PointerInferenceComputation,
pointer_inference::State as PointerInferenceState,
},
intermediate_representation::{Def, ExternSymbol, Project, Term, Tid},
utils::binary::RuntimeMemoryImage,
};
use super::{state::State, Config};
pub mod symbol_calls;
mod trait_impls;
/// Contains all context information needed for the string abstract fixpoint computation.
///
/// The struct also implements the `interprocedural_fixpoint::Context` trait to enable the fixpoint computation.
pub struct Context<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> {
/// A reference to the `Project` object representing the binary
pub project: &'a Project,
/// The runtime memory image for reading global read-only variables.
/// Note that values of writeable global memory segments are not tracked.
pub runtime_memory_image: &'a RuntimeMemoryImage,
/// A pointer to the results of the pointer inference analysis.
/// They are used to determine the targets of pointers to memory,
/// which in turn is used to keep track of taint on the stack or on the heap.
pub pointer_inference_results: &'a PointerInferenceComputation<'a>,
/// Maps the TIDs of functions that shall be treated as string extern symbols to the `ExternSymbol` object representing it.
pub string_symbol_map: HashMap<Tid, &'a ExternSymbol>,
/// Maps the TIDs of functions that shall be treated as general extern symbols to the `ExternSymbol` object representing it.
pub extern_symbol_map: HashMap<Tid, &'a ExternSymbol>,
/// Maps string symbols to their corresponding format string parameter index.
pub format_string_index_map: HashMap<String, usize>,
/// A map to get the node index of the `BlkStart` node containing a given [`Def`] as the first `Def` of the block.
/// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
pub block_start_node_map: HashMap<(Tid, Tid), NodeIndex>,
/// A set containing a given [`Def`](crate::intermediate_representation::Def) as the first `Def` of the block.
/// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
pub block_first_def_set: HashSet<(Tid, Tid)>,
/// A map to get the node index of the `BlkEnd` node containing a given [`Jmp`].
/// The keys are of the form `(Jmp-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
pub jmp_to_blk_end_node_map: HashMap<(Tid, Tid), NodeIndex>,
_phantom_string_domain: PhantomData<T>,
}
impl<'a, T: AbstractDomain + HasTop + Eq + From<String> + DomainInsertion> Context<'a, T> {
/// Create a new context object for a given project.
pub fn new(
project: &'a Project,
runtime_memory_image: &'a RuntimeMemoryImage,
pointer_inference_results: &'a PointerInferenceComputation<'a>,
config: Config,
) -> Context<'a, T> {
let string_symbol_map =
crate::utils::symbol_utils::get_symbol_map(project, &config.string_symbols[..]);
let mut extern_symbol_map = HashMap::new();
for (tid, symbol) in project.program.term.extern_symbols.iter() {
extern_symbol_map.insert(tid.clone(), symbol);
}
let mut block_start_node_map: HashMap<(Tid, Tid), NodeIndex> = HashMap::new();
let mut block_first_def_set = HashSet::new();
let mut jmp_to_blk_end_node_map = HashMap::new();
for (node_id, node) in pointer_inference_results.get_graph().node_references() {
match node {
Node::BlkStart(block, sub) => {
if let Some(def) = block.term.defs.get(0) {
block_start_node_map.insert((def.tid.clone(), sub.tid.clone()), node_id);
block_first_def_set.insert((def.tid.clone(), sub.tid.clone()));
}
}
Node::BlkEnd(block, sub) => {
for jmp in block.term.jmps.iter() {
jmp_to_blk_end_node_map.insert((jmp.tid.clone(), sub.tid.clone()), node_id);
}
}
_ => (),
}
}
Context {
project,
runtime_memory_image,
pointer_inference_results,
format_string_index_map: config.format_string_index.into_iter().collect(),
string_symbol_map,
extern_symbol_map,
block_start_node_map,
block_first_def_set,
jmp_to_blk_end_node_map,
_phantom_string_domain: PhantomData,
}
}
/// Get the current pointer inference state (if one can be found) for the given state.
fn get_current_pointer_inference_state(
&self,
state: &State<T>,
tid: &Tid,
) -> Option<PointerInferenceState> {
if let Some(pi_state) = state.get_pointer_inference_state() {
Some(pi_state.clone())
} else if let Some(node_id) = self
.block_start_node_map
.get(&(tid.clone(), state.get_current_sub().unwrap().tid.clone()))
{
match self.pointer_inference_results.get_node_value(*node_id) {
Some(NodeValue::Value(val)) => Some(val.clone()),
_ => None,
}
} else {
None
}
}
/// Update the pointer inference state contained in the given taint state
/// according to the effect of the given `Def` term.
fn update_pointer_inference_state(&self, state: &mut State<T>, def: &Term<Def>) {
if let Some(pi_state) = self.get_current_pointer_inference_state(state, &def.tid) {
let pi_context = self.pointer_inference_results.get_context();
let new_pi_state = pi_context.update_def(&pi_state, def);
state.set_pointer_inference_state(new_pi_state);
}
}
}
#[cfg(test)]
mod tests;
//! This module handles the string processing at external symbol calls.
use regex::Regex;
use std::collections::BTreeMap;
use crate::abstract_domain::{
AbstractIdentifier, DomainInsertion, HasTop, IntervalDomain, TryToBitvec,
};
use crate::analysis::pointer_inference::State as PointerInferenceState;
use crate::intermediate_representation::{Bitvector, Datatype};
use crate::{abstract_domain::AbstractDomain, intermediate_representation::ExternSymbol};
use super::super::state::State;
use super::Context;
mod memcpy;
mod scanf;
mod sprintf;
mod strcat;
impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Context<'a, T> {
/// Handles generic symbol calls by deleting all non callee saved pointer entries.
pub fn handle_generic_symbol_calls(
&self,
extern_symbol: &ExternSymbol,
state: &State<T>,
) -> State<T> {
let mut new_state = state.clone();
new_state.remove_non_callee_saved_pointer_entries_for_external_symbol(
self.project,
extern_symbol,
);
new_state
}
/// Handles calls to external symbols for which no ExternSymbol object is known.
pub fn handle_unknown_symbol_calls(&self, state: &mut State<T>) {
if let Some(standard_cconv) = self.project.get_standard_calling_convention() {
let mut filtered_map = state.get_variable_to_pointer_map().clone();
for (register, _) in state.get_variable_to_pointer_map().clone().iter() {
if !standard_cconv
.callee_saved_register
.contains(&register.name)
{
filtered_map.remove(register);
}
}
state.set_variable_to_pointer_map(filtered_map);
}
}
/// The output of a string symbol is added to the map of abstract strings.
/// If the symbol returns a format string, the string is approximated
/// as good as possible by checking the input parameters.
pub fn handle_string_symbol_calls(
&self,
extern_symbol: &ExternSymbol,
state: &State<T>,
) -> State<T> {
let mut new_state = match extern_symbol.name.as_str() {
"scanf" | "__isoc99_scanf" => self.handle_scanf_calls(state, extern_symbol),
"sscanf" | "__isoc99_sscanf" => self.handle_sscanf_calls(state, extern_symbol),
"sprintf" | "snprintf" | "vsprintf" | "vsnprintf" => {
self.handle_sprintf_and_snprintf_calls(state, extern_symbol)
}
"strcat" | "strncat" => self.handle_strcat_and_strncat_calls(state, extern_symbol),
"memcpy" => self.handle_memcpy_calls(state, extern_symbol),
"free" => self.handle_free(state, extern_symbol),
_ => panic!("Unexpected Extern Symbol."),
};
new_state.remove_non_callee_saved_pointer_entries_for_external_symbol(
self.project,
extern_symbol,
);
new_state
}
/// Takes the pointer target if there is only one and checks whether the target
/// is inside the current stack frame. If so, the string domain is added to the
/// analysis.
pub fn add_new_string_abstract_domain(
state: &mut State<T>,
pi_state: &PointerInferenceState,
pointer: &BTreeMap<AbstractIdentifier, IntervalDomain>,
domain_input_string: T,
) {
for (target, offset) in pointer.iter() {
if pi_state.caller_stack_ids.contains(target) || pi_state.stack_id == *target {
if let Ok(offset_value) = offset.try_to_offset() {
state.add_new_stack_offset_to_string_entry(
offset_value,
domain_input_string.clone(),
);
}
} else {
state.add_new_heap_to_string_entry(target.clone(), domain_input_string.clone());
}
}
}
/// Regex that filters format specifier from a format string.
pub fn re_format_specifier() -> Regex {
Regex::new(r#"%\d{0,2}([c,C,d,i,o,u,x,X,e,E,f,F,g,G,a,A,n,p,s,S]|hi|hd|hu|li|ld|lu|lli|lld|llu|lf|lg|le|la|lF|lG|lE|lA|Lf|Lg|Le|La|LF|LG|LE|LA)"#).expect("No valid regex!")
}
/// Merges domains from multiple pointer targets. The merged domain serves as input to a format string.
/// If one of the targets does not contain a domain or the offset of a stack target cannot be parsed,
/// a *Top* value is returned as no assumption can be made about the input.
pub fn merge_domains_from_multiple_pointer_targets(
state: &State<T>,
pi_state: &PointerInferenceState,
pointer: &BTreeMap<AbstractIdentifier, IntervalDomain>,
) -> T {
let mut domains: Vec<T> = Vec::new();
for (target, offset) in pointer.iter() {
// Check the stack offset map if the target points to a stack position.
if pi_state.caller_stack_ids.contains(target) || pi_state.stack_id == *target {
if let Ok(offset_value) = offset.try_to_offset() {
if let Some(domain) = state.get_stack_offset_to_string_map().get(&offset_value)
{
domains.push(domain.clone());
} else {
return T::create_top_value_domain();
}
} else {
return T::create_top_value_domain();
}
} else {
// Check the heap map if the target points to a heap position.
if let Some(domain) = state.get_heap_to_string_map().get(target) {
domains.push(domain.clone());
} else {
return T::create_top_value_domain();
}
}
}
let mut init_domain = domains.first().unwrap().clone();
domains.remove(0);
for remaining_domain in domains.iter() {
init_domain = init_domain.merge(remaining_domain);
}
init_domain
}
/// Calls the appropriate data type approximator.
pub fn approximate_string_domain_from_datatype(specifier: String) -> T {
match Datatype::from(specifier) {
Datatype::Char => T::create_char_domain(),
Datatype::Integer => T::create_integer_domain(),
Datatype::Pointer => T::create_pointer_value_domain(),
Datatype::Double | Datatype::Long | Datatype::LongDouble | Datatype::LongLong => {
T::create_float_value_domain()
}
_ => panic!("Invalid data type specifier from format string."),
}
}
/// Inserts an integer constant into the format string.
pub fn get_constant_integer_domain(constant: Bitvector) -> Option<T> {
if let Ok(integer) = constant.try_to_i64() {
return Some(T::from(integer.to_string()));
}
None
}
/// Inserts a char constant into the format string.
pub fn get_constant_char_domain(&self, constant: Bitvector) -> Option<T> {
if let Ok(Some(char_code)) = self.runtime_memory_image.read(
&constant,
self.project
.datatype_properties
.get_size_from_data_type(Datatype::Char),
) {
if let Some(c_char) = Context::<T>::parse_bitvec_to_char(char_code) {
return Some(T::from(c_char.to_string()));
}
} else if let Some(c_char) = Context::<T>::parse_bitvec_to_char(constant.clone()) {
return Some(T::from(c_char.to_string()));
}
None
}
/// Parses a bitvector to a char if possible.
pub fn parse_bitvec_to_char(char_code: Bitvector) -> Option<char> {
if let Ok(code) = char_code.try_to_u32() {
if let Some(c_char) = std::char::from_u32(code) {
return Some(c_char);
}
}
None
}
/// Inserts a string constant into the format string.
pub fn get_constant_string_domain(&self, constant: Bitvector) -> Option<T> {
if let Ok(string) = self
.runtime_memory_image
.read_string_until_null_terminator(&constant)
{
if !string.is_empty() {
return Some(T::from(string.to_string()));
}
}
None
}
/// Deletes string entries in the heap to string map if the corresponding pointer is used
/// to free memory space.
pub fn handle_free(&self, state: &State<T>, extern_symbol: &ExternSymbol) -> State<T> {
let mut new_state = state.clone();
if let Some(dest_arg) = extern_symbol.parameters.first() {
if let Some(pi_state) = state.get_pointer_inference_state() {
if let Ok(pointer) = pi_state.eval_parameter_arg(
dest_arg,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
let heap_to_string_map = state.get_heap_to_string_map();
for (target, _) in pointer.get_relative_values().iter() {
if heap_to_string_map.contains_key(target) {
new_state.remove_heap_to_string_entry(target);
}
}
}
}
}
new_state
}
}
#[cfg(test)]
pub mod tests;
use super::*;
impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Context<'a, T> {
pub fn mock(
project: &'a Project,
string_symbols: HashMap<Tid, &'a ExternSymbol>,
format_string_index: HashMap<String, usize>,
pointer_inference_results: &'a PointerInferenceComputation<'a>,
runtime_memory_image: &'a RuntimeMemoryImage,
) -> Self {
let mut extern_symbol_map = HashMap::new();
for (tid, symbol) in project.program.term.extern_symbols.iter() {
extern_symbol_map.insert(tid.clone(), symbol);
}
let mut block_start_node_map = HashMap::new();
let mut block_first_def_set = HashSet::new();
let mut jmp_to_blk_end_node_map = HashMap::new();
for (node_id, node) in pointer_inference_results.get_graph().node_references() {
match node {
Node::BlkStart(block, sub) => {
if let Some(def) = block.term.defs.get(0) {
block_start_node_map.insert((def.tid.clone(), sub.tid.clone()), node_id);
block_first_def_set.insert((def.tid.clone(), sub.tid.clone()));
}
}
Node::BlkEnd(block, sub) => {
for jmp in block.term.jmps.iter() {
jmp_to_blk_end_node_map.insert((jmp.tid.clone(), sub.tid.clone()), node_id);
}
}
_ => (),
}
}
Context {
project,
runtime_memory_image,
pointer_inference_results,
string_symbol_map: string_symbols,
extern_symbol_map,
format_string_index_map: format_string_index,
block_start_node_map,
block_first_def_set,
jmp_to_blk_end_node_map: jmp_to_blk_end_node_map,
_phantom_string_domain: PhantomData,
}
}
}
use crate::{
abstract_domain::{AbstractDomain, DomainInsertion, HasTop},
analysis::string_abstraction::state::State,
intermediate_representation::{Blk, Def, Expression, Jmp, Term},
};
use super::Context;
impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>>
crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Context<'a, T>
{
type Value = State<T>;
/// Get the underlying graph on which the analysis operates.
fn get_graph(&self) -> &crate::analysis::graph::Graph<'a> {
self.pointer_inference_results.get_graph()
}
/// Merge two state values.
fn merge(&self, state1: &Self::Value, state2: &Self::Value) -> State<T> {
state1.merge(state2)
}
fn update_def(&self, state: &State<T>, def: &Term<Def>) -> Option<State<T>> {
let mut new_state = state.clone();
if state.get_pointer_inference_state().is_none() {
if self.block_first_def_set.contains(&(
def.tid.clone(),
state.get_current_sub().unwrap().tid.clone(),
)) {
if let Some(pi_state) = self.get_current_pointer_inference_state(state, &def.tid) {
new_state.set_pointer_inference_state(Some(pi_state));
} else {
return None;
}
} else {
return None;
}
}
self.update_pointer_inference_state(&mut new_state, def);
match &def.term {
Def::Assign {
var: output,
value: input,
} => {
new_state.handle_assign_and_load(
output,
input,
self.runtime_memory_image,
&self.block_first_def_set,
true,
);
}
Def::Load {
var: output,
address: input,
} => {
new_state.handle_assign_and_load(
output,
input,
self.runtime_memory_image,
&self.block_first_def_set,
false,
);
}
Def::Store { address, value } => new_state.handle_store(
address,
value,
self.runtime_memory_image,
&self.block_first_def_set,
),
}
Some(new_state)
}
fn update_jump(
&self,
state: &State<T>,
_jump: &Term<Jmp>,
_untaken_conditional: Option<&Term<Jmp>>,
_target: &Term<Blk>,
) -> Option<State<T>> {
let mut new_state = state.clone();
new_state.set_pointer_inference_state(None);
Some(new_state)
}
fn update_call(
&self,
_state: &State<T>,
_call: &Term<Jmp>,
_target: &crate::analysis::graph::Node,
) -> Option<State<T>> {
None
}
fn update_return(
&self,
_state: Option<&State<T>>,
state_before_call: Option<&State<T>>,
_call_term: &Term<Jmp>,
_return_term: &Term<Jmp>,
) -> Option<State<T>> {
if let Some(state) = state_before_call {
let mut new_state = state.clone();
self.handle_unknown_symbol_calls(&mut new_state);
new_state.set_pointer_inference_state(None);
return Some(new_state);
}
None
}
fn update_call_stub(&self, state: &State<T>, call: &Term<Jmp>) -> Option<State<T>> {
let mut new_state = state.clone();
match &call.term {
Jmp::Call { target, .. } => match self.extern_symbol_map.get(target) {
Some(symbol) => {
if let Some(string_symbol) = self.string_symbol_map.get(target) {
new_state = self.handle_string_symbol_calls(string_symbol, &new_state);
} else {
new_state = self.handle_generic_symbol_calls(symbol, &new_state);
}
}
None => panic!("Extern symbol not found."),
},
Jmp::CallInd { .. } => self.handle_unknown_symbol_calls(&mut new_state),
_ => panic!("Malformed control flow graph encountered."),
}
new_state.set_pointer_inference_state(None);
Some(new_state)
}
fn specialize_conditional(
&self,
state: &State<T>,
_condition: &Expression,
_block_before_condition: &Term<Blk>,
_is_true: bool,
) -> Option<State<T>> {
Some(state.clone())
}
}
#[cfg(test)]
mod tests;
use std::collections::HashSet;
use crate::{
abstract_domain::{
AbstractIdentifier, AbstractLocation, CharacterInclusionDomain, DataDomain, IntervalDomain,
},
analysis::pointer_inference::PointerInference as PointerInferenceComputation,
analysis::{
forward_interprocedural_fixpoint::Context,
string_abstraction::{
context::symbol_calls::tests::Setup,
tests::mock_project_with_intraprocedural_control_flow, tests::Setup as ProjectSetup,
},
},
intermediate_representation::{Bitvector, Blk, ByteSize, ExternSymbol, Jmp, Tid, Variable},
utils::binary::RuntimeMemoryImage,
};
#[test]
fn test_update_def() {
let memcpy_symbol = ExternSymbol::mock_memcpy_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(memcpy_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
setup.context.block_first_def_set = HashSet::new();
let project_setup = ProjectSetup::new();
let assign_def = project_setup.string_input_constant("assign_def", "r1", 0x7000);
let load_def = project_setup.load_var_content_from_temp_var("load_def", "r5", "r2");
let store_def = project_setup.store_var_content_at_temp_var("store_def", "r0", "r5");
let new_state = setup
.context
.update_def(&setup.state_before_call, &assign_def)
.unwrap();
let absolute_target = DataDomain::from(Bitvector::from_i32(0x7000));
assert_eq!(
absolute_target,
*new_state
.get_variable_to_pointer_map()
.get(&Variable::mock("r1", 4))
.unwrap()
);
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("sp", 4)).unwrap(),
);
let loaded_pointer = DataDomain::from_target(stack_id.clone(), IntervalDomain::mock_i32(4, 4));
let pointer_to_pointer =
DataDomain::from_target(stack_id.clone(), IntervalDomain::mock_i32(8, 8));
let _ = setup.pi_state_before_symbol_call.store_value(
&pointer_to_pointer,
&loaded_pointer,
&mem_image,
);
let r2_reg = Variable {
name: String::from("r2"),
size: ByteSize::new(4),
is_temp: true,
};
setup
.pi_state_before_symbol_call
.set_register(&r2_reg, pointer_to_pointer);
setup
.state_before_call
.set_pointer_inference_state(Some(setup.pi_state_before_symbol_call.clone()));
setup
.state_before_call
.add_new_variable_to_pointer_entry(Variable::mock("r3", 4), loaded_pointer.clone());
let new_state = setup
.context
.update_def(&setup.state_before_call, &load_def)
.unwrap();
assert_eq!(
loaded_pointer,
*new_state
.get_variable_to_pointer_map()
.get(&Variable::mock("r5", 4))
.unwrap()
);
let store_target = DataDomain::from_target(stack_id, IntervalDomain::mock_i32(12, 12));
let r0_reg = Variable {
name: String::from("r0"),
size: ByteSize::new(4),
is_temp: true,
};
setup
.pi_state_before_symbol_call
.set_register(&r0_reg, store_target);
setup
.pi_state_before_symbol_call
.set_register(&Variable::mock("r5", 4), absolute_target.clone());
setup
.state_before_call
.set_pointer_inference_state(Some(setup.pi_state_before_symbol_call));
let new_state = setup
.context
.update_def(&setup.state_before_call, &store_def)
.unwrap();
assert_eq!(
absolute_target,
*new_state
.get_stack_offset_to_pointer_map()
.get(&12)
.unwrap()
);
}
#[test]
fn test_update_jump() {
let memcpy_symbol = ExternSymbol::mock_memcpy_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(memcpy_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let new_state = setup
.context
.update_jump(
&setup.state_before_call,
&Jmp::branch("start1", "end1"),
Some(&Jmp::branch("start2", "end2")),
&Blk::mock(),
)
.unwrap();
assert_eq!(None, new_state.get_pointer_inference_state());
}
#[test]
fn test_update_return() {
let memcpy_symbol = ExternSymbol::mock_memcpy_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(memcpy_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let pointer = DataDomain::from(Bitvector::from_i32(0x6000));
let callee_saved_reg = Variable::mock("r11", 4);
let non_callee_saved_reg = Variable::mock("r0", 4);
setup
.state_before_call
.add_new_variable_to_pointer_entry(callee_saved_reg.clone(), pointer.clone());
setup
.state_before_call
.add_new_variable_to_pointer_entry(non_callee_saved_reg.clone(), pointer.clone());
let new_state = setup.context.update_return(
None,
None,
&Jmp::branch("start1", "end1"),
&Jmp::branch("start2", "end2"),
);
assert_eq!(None, new_state);
let new_state = setup
.context
.update_return(
Some(&setup.state_before_call),
Some(&setup.state_before_call),
&Jmp::branch("start1", "end1"),
&Jmp::branch("start2", "end2"),
)
.unwrap();
assert_eq!(None, new_state.get_pointer_inference_state());
assert_eq!(1, new_state.get_variable_to_pointer_map().len());
assert_eq!(
pointer,
*new_state
.get_variable_to_pointer_map()
.get(&callee_saved_reg)
.unwrap()
);
}
#[test]
fn test_update_call_stub() {
let memcpy_symbol = ExternSymbol::mock_memcpy_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(memcpy_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let call_to_memcpy = Jmp::call("jmp1", "memcpy", Some("blk1"));
let new_state = setup
.context
.update_call_stub(&setup.state_before_call, &call_to_memcpy)
.unwrap();
assert_eq!(
CharacterInclusionDomain::ci("str1 str2 str3 str4"),
*new_state
.get_stack_offset_to_string_map()
.get(&-60)
.unwrap()
);
}
//! A fixpoint analysis that abstracts strings in the program using various string abstract domains.
//! These include the Character Inclusion Domain and Bricks Domain among others.
use std::{
collections::{BTreeMap, HashMap},
fmt::Debug,
};
use crate::{
abstract_domain::{AbstractDomain, DomainInsertion, HasTop},
intermediate_representation::Project,
prelude::*,
utils::binary::RuntimeMemoryImage,
};
use self::state::State;
use super::{
fixpoint::Computation, forward_interprocedural_fixpoint::GeneralizedContext, graph::Graph,
interprocedural_fixpoint_generic::NodeValue,
pointer_inference::PointerInference as PointerInferenceComputation,
};
pub mod context;
pub mod state;
use context::*;
use petgraph::graph::NodeIndex;
/// Configurable parameters for the analysis.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Config {
/// Names of extern functions that manipulate strings
/// or could introduce new strings (e.g. scanf).
pub string_symbols: Vec<String>,
/// The index of the format string parameter in the function signature
/// of an external symbol.
pub format_string_index: BTreeMap<String, usize>,
}
/// A wrapper struct for the string abstraction computation object.
pub struct StringAbstraction<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> {
computation: Computation<GeneralizedContext<'a, Context<'a, T>>>,
}
impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>>
StringAbstraction<'a, T>
{
/// Generate a new string abstraction computation for a project.
pub fn new(
project: &'a Project,
runtime_memory_image: &'a RuntimeMemoryImage,
control_flow_graph: &'a Graph<'a>,
pointer_inference_results: &'a PointerInferenceComputation<'a>,
config: Config,
) -> StringAbstraction<'a, T> {
let context = Context::new(
project,
runtime_memory_image,
pointer_inference_results,
config,
);
let mut sub_to_entry_blocks_map = HashMap::new();
for sub in project.program.term.subs.iter() {
if let Some(entry_block) = sub.term.blocks.get(0) {
sub_to_entry_blocks_map.insert(sub.tid.clone(), entry_block.tid.clone());
}
}
let mut tid_to_graph_indices_map = HashMap::new();
for node in control_flow_graph.node_indices() {
if let super::graph::Node::BlkStart(block, sub) = control_flow_graph[node] {
tid_to_graph_indices_map.insert((block.tid.clone(), sub.tid.clone()), node);
}
}
let sub_to_entry_node_map: HashMap<Tid, NodeIndex> = sub_to_entry_blocks_map
.into_iter()
.filter_map(|(sub_tid, block_tid)| {
tid_to_graph_indices_map
.get(&(block_tid, sub_tid.clone()))
.map(|start_node_index| (sub_tid, *start_node_index))
})
.collect();
let mut fixpoint_computation =
super::forward_interprocedural_fixpoint::create_computation(context, None);
for (_, start_node_index) in sub_to_entry_node_map.into_iter() {
fixpoint_computation.set_node_value(
start_node_index,
super::interprocedural_fixpoint_generic::NodeValue::Value(State::new(
start_node_index,
pointer_inference_results,
)),
);
}
StringAbstraction {
computation: fixpoint_computation,
}
}
/// Compute the fixpoint of the string abstraction analysis.
/// Has a `max_steps` bound for the fixpoint algorithm to prevent infinite loops.
pub fn compute(&mut self) {
self.computation.compute_with_max_steps(100); // TODO: make max_steps configurable!
}
/// Get the string abstraction computation.
pub fn get_computation(&self) -> &Computation<GeneralizedContext<'a, Context<'a, T>>> {
&self.computation
}
/// Get the underlying graph of the computation.
pub fn get_graph(&self) -> &Graph {
self.computation.get_graph()
}
/// Get the context object of the computation.
pub fn get_context(&self) -> &Context<'a, T> {
self.computation.get_context().get_context()
}
/// Get the value associated to a node in the computed fixpoint
/// (or intermediate state of the algorithm if the fixpoint has not been reached yet).
/// Returns `None` if no value is associated to the Node.
pub fn get_node_value(&self, node_id: NodeIndex) -> Option<&NodeValue<State<T>>> {
self.computation.get_node_value(node_id)
}
}
/// Compute the string abstraction and return its results.
pub fn run<'a, T: AbstractDomain + HasTop + Eq + From<String> + DomainInsertion>(
project: &'a Project,
runtime_memory_image: &'a RuntimeMemoryImage,
control_flow_graph: &'a Graph<'a>,
pointer_inference: &'a PointerInferenceComputation<'a>,
config: Config,
) -> StringAbstraction<'a, T> {
let mut string_abstraction = StringAbstraction::new(
project,
runtime_memory_image,
control_flow_graph,
pointer_inference,
config,
);
string_abstraction.compute();
string_abstraction
}
#[cfg(test)]
pub mod tests;
......@@ -171,6 +171,27 @@ mod tests {
data_type: None,
}
}
pub fn mock_register_with_data_type(
name: impl ToString,
size_in_bytes: impl Into<ByteSize>,
data_type: Option<Datatype>,
) -> Arg {
Arg::Register {
var: Variable::mock(name.to_string(), size_in_bytes),
data_type,
}
}
pub fn mock_pointer_register(
name: impl ToString,
size_in_bytes: impl Into<ByteSize>,
) -> Arg {
Arg::Register {
var: Variable::mock(name.to_string(), size_in_bytes),
data_type: Some(Datatype::Pointer),
}
}
}
impl ExternSymbol {
......@@ -186,5 +207,18 @@ mod tests {
has_var_args: false,
}
}
pub fn mock_string() -> Self {
ExternSymbol {
tid: Tid::new("sprintf"),
addresses: vec!["UNKNOWN".to_string()],
name: "sprintf".to_string(),
calling_convention: Some("__stdcall".to_string()),
parameters: vec![Arg::mock_register("RDI", 8), Arg::mock_register("RSI", 8)],
return_values: vec![Arg::mock_register("RAX", 8)],
no_return: false,
has_var_args: true,
}
}
}
}
......@@ -60,8 +60,11 @@ You can find out more information about each check, including known false positi
by reading the check-specific module documentation in the [`checkers`] module.
*/
use abstract_domain::BricksDomain;
use crate::analysis::graph::Graph;
use crate::analysis::pointer_inference::PointerInference;
use crate::analysis::string_abstraction::StringAbstraction;
use crate::intermediate_representation::Project;
use crate::utils::binary::RuntimeMemoryImage;
use crate::utils::log::{CweWarning, LogMessage};
......@@ -140,6 +143,8 @@ pub struct AnalysisResults<'a> {
pub project: &'a Project,
/// The result of the pointer inference analysis if already computed.
pub pointer_inference: Option<&'a PointerInference<'a>>,
/// The result of the string abstraction if already computed.
pub string_abstraction: Option<&'a StringAbstraction<'a, BricksDomain>>,
}
impl<'a> AnalysisResults<'a> {
......@@ -156,6 +161,7 @@ impl<'a> AnalysisResults<'a> {
control_flow_graph,
project,
pointer_inference: None,
string_abstraction: None,
}
}
......@@ -186,4 +192,33 @@ impl<'a> AnalysisResults<'a> {
..self
}
}
/// Compute the string abstraction.
/// As the string abstraction depends on the pointer inference, the
/// pointer inference is also computed and put into the `AnalysisResults` struct.
/// The result gets returned, but not saved to the `AnalysisResults` struct itself.
pub fn compute_string_abstraction(
&'a self,
config: &serde_json::Value,
pi_results: Option<&'a PointerInference<'a>>,
) -> StringAbstraction<BricksDomain> {
crate::analysis::string_abstraction::run(
self.project,
self.runtime_memory_image,
self.control_flow_graph,
pi_results.unwrap(),
serde_json::from_value(config.clone()).unwrap(),
)
}
/// Create a new `AnalysisResults` struct containing the given string abstraction results.
pub fn set_string_abstraction<'b: 'a>(
self,
string_abstraction: Option<&'b StringAbstraction<'a, BricksDomain>>,
) -> AnalysisResults<'b> {
AnalysisResults {
string_abstraction,
..self
}
}
}
......@@ -565,8 +565,8 @@ impl ExternSymbol {
calling_convention: self.calling_convention,
parameters,
return_values,
no_return: self.no_return,
has_var_args: self.has_var_args,
no_return: symbol.no_return,
has_var_args: symbol.has_var_args,
}
}
}
......
......@@ -118,7 +118,7 @@ pub fn parse_format_string_parameters(
Ok(datatype_map)
}
/// Returns an argument vector of detected variable parameters if they are of type string.
/// Returns an argument vector of detected variable parameters.
pub fn get_variable_parameters(
project: &Project,
pi_state: &PointerInferenceState,
......@@ -140,14 +140,19 @@ pub fn get_variable_parameters(
);
if let Ok(format_string) = format_string_results.as_ref() {
if let Ok(parameters) =
parse_format_string_parameters(format_string, &project.datatype_properties)
{
return Ok(calculate_parameter_locations(
parameters,
extern_symbol.get_calling_convention(project),
format_string_index,
));
let parameter_result =
parse_format_string_parameters(format_string, &project.datatype_properties);
match parameter_result {
Ok(parameters) => {
return Ok(calculate_parameter_locations(
parameters,
extern_symbol.get_calling_convention(project),
format_string_index,
));
}
Err(e) => {
return Err(anyhow!("Could not parse variable parameters: {}", e));
}
}
}
......
......@@ -216,6 +216,7 @@ mod tests {
mark_architecture_skipped(&mut tests, "ppc64"); // Ghidra generates mangled function names here for some reason.
mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
mark_skipped(&mut tests, "x86", "gcc");
mark_skipped(&mut tests, "x86", "clang"); // Return value detection insufficient for x86
mark_skipped(&mut tests, "arm", "clang"); // Loss of stack pointer position
mark_skipped(&mut tests, "aarch64", "clang"); // Loss of stack pointer position
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment