Unverified Commit e8251916 by Melvin Klimke Committed by GitHub

Abstract string domains (#235)

Adds a string abstraction analysis and includes a rewrite of the CWE-78 check using the new abstract string domains.
parent cfc85702
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "acceptance_tests_ghidra"
version = "0.1.0"
......@@ -160,6 +162,7 @@ dependencies = [
"fnv",
"gcd",
"goblin",
"itertools",
"petgraph",
"regex",
"serde",
......@@ -207,6 +210,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56899898ce76aaf4a0f24d914c97ea6ed976d42fec6ad33fcbb0a1103e07b2b0"
[[package]]
name = "either"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
name = "fixedbitset"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
......@@ -287,6 +296,15 @@ dependencies = [
]
[[package]]
name = "itertools"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
......
......@@ -176,17 +176,38 @@ fn run_with_ghidra(args: &CmdlineArgs) {
&project,
);
let modules_depending_on_pointer_inference = vec!["CWE78", "CWE134", "CWE476", "Memory"];
let pointer_inference_results = if modules
let modules_depending_on_string_abstraction = vec!["CWE78"];
let modules_depending_on_pointer_inference = vec!["CWE134", "CWE476", "Memory"];
let string_abstraction_needed = modules
.iter()
.any(|module| modules_depending_on_pointer_inference.contains(&module.name))
{
.any(|module| modules_depending_on_string_abstraction.contains(&module.name));
let pi_analysis_needed = string_abstraction_needed
|| modules
.iter()
.any(|module| modules_depending_on_pointer_inference.contains(&module.name));
let pi_analysis_results = if pi_analysis_needed {
Some(analysis_results.compute_pointer_inference(&config["Memory"], args.statistics))
} else {
None
};
let analysis_results = analysis_results.set_pointer_inference(pi_analysis_results.as_ref());
let string_abstraction_results =
if string_abstraction_needed {
Some(analysis_results.compute_string_abstraction(
&config["StringAbstraction"],
pi_analysis_results.as_ref(),
))
} else {
None
};
let analysis_results =
analysis_results.set_pointer_inference(pointer_inference_results.as_ref());
analysis_results.set_string_abstraction(string_abstraction_results.as_ref());
// Print debug and then return.
// Right now there is only one debug printing function.
......
{
"CWE78": {
"system_symbols": [
"system",
"execl"
],
"string_symbols": [
"sprintf",
"snprintf",
"strcat",
"strncat"
],
"user_input_symbols": [
"scanf",
"__isoc99_scanf",
"sscanf",
"__isoc99_sscanf"
],
"format_string_index": {
"sprintf": 1,
"snprintf": 2,
"scanf": 0,
"__isoc99_scanf": 0,
"sscanf": 1,
"__isoc99_sscanf": 1
}
"system"
]
},
"CWE134": {
"format_string_symbols": [
......@@ -253,5 +232,32 @@
"deallocation_symbols": [
"free"
]
},
"StringAbstraction": {
"string_symbols": [
"sprintf",
"snprintf",
"vsprintf",
"vsnprintf",
"strcat",
"strncat",
"scanf",
"__isoc99_scanf",
"sscanf",
"__isoc99_sscanf",
"memcpy",
"free"
],
"format_string_index": {
"printf": 0,
"sprintf": 1,
"snprintf": 2,
"vsprintf": 1,
"vsnprintf": 2,
"scanf": 0,
"__isoc99_scanf": 0,
"sscanf": 1,
"__isoc99_sscanf": 1
}
}
}
......@@ -17,6 +17,7 @@ crossbeam-channel = "0.4"
derive_more = "0.99"
directories = "3.0"
goblin = "0.2"
itertools = "0.10.0"
gcd = "2.0"
[lib]
......
//! This module contains the BricksDomain and BrickDomain.
//!
//! The BricksDomain contains a sorted list of normalized BrickDomains.
//! It represents the composition of a string through sub sequences.
//! When a string is assigned to the BricksDomain, it is defined as a single sequence bricks
//! which occurs at least and at most one time which is represented by a min and max value in the
//! BrickDomain. e.g. "cwe" => \[\[{"cwe"}\]^{1,1}\]
//!
//! If two string are concatenated, their brick sequences are concatenated.
//! e.g. B1 = \[\[{"a"}\]^{1,1}\], B2 = \[\[{"b"}\]^{1,1}\] => B_new = \[\[{"a"}\]^{1,1}, \[{"b"}\]^{1,1}\]
//!
//! A set of strings can be built from multiple configurations of bricks
//! e.g. \[{"abc"}\]^{1,1} <=> \[{"a"}\]^{1,1}\[{"b"}\]^{1,1}\[{"c"}\]^{1,1}
//!
//! Introducing a normalized form \[T\]^{1,1} or \[T\]^{0, max>0}
//! will keep string representations unambiguous.
//!
//! Widening is applied for merges, so that the domains do not become too big.
//! Certain thresholds are defined which cause the domains to be widened if exceeded.
//! These thresholds are:
//! - the *interval threshold* which overapproximates the number of times string sequences can occur in a brick.
//! - the *sequence threshold* which overapproximates the number of string sequences in a brick by forcing a *Top* value.
//! - the *length threshold* which overapproximates the number of bricks in the BricksDomain and forces a *Top* value.
use std::{collections::BTreeSet, fmt};
use super::{AbstractDomain, DomainInsertion, HasTop};
use crate::prelude::*;
use std::fmt::Debug;
mod brick;
use brick::Brick;
mod widening;
/// The BricksDomain contains a sorted list of single normalized BrickDomains.
/// It represents the composition of a string through sub sequences.
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
pub enum BricksDomain {
/// The *Top* value represents an invalid sequence.
Top,
/// This values represents a sequence of string subsequences.
Value(Vec<BrickDomain>),
}
impl BricksDomain {
/// A set of strings can be built from multiple configurations of bricks
/// e.g. \[{abc}\]^{1,1} <=> \[{a}\]^{1,1}\[{b}\]^{1,1}\[{c}\]^{1,1}
///
/// Introducing a normalized form \[T\]^{1,1} or \[T\]^{0, max>0}
/// will keep string representations unambiguous.
///
/// Normalizing can be seen as some kind of fixpoint for a set of 5 rules that are applied
/// to the list of bricks until the state stays unchanged:
/// 1. **remove** bricks of the form \[{}\]^{0,0} empty string
/// 2. **merge** successive bricks with the same indices max = 1, min = 1, in a new single brick.
/// The new string set is the concatenation of the former two. e.g. B0 = \[{a,cd}\]^{1,1}
/// and B1 = \[{b,ef}\]^{1,1} become B_new = \[{ab, aef, cdb, cdef}\]^{1,1}.
/// 3. **transform** a brick in which the number of applications is constant (min = max) into one in which
/// min = max = 1. e.g. B = \[{a,b}\]^{2,2} => B_new = \[{aa, ab, ba, bb}\]^{1,1}.
/// 4. **merge** two successive bricks in which the set of strings is the same. e.g. B1 = \[S\]^{m1, M1}
/// and B2 = \[S\]^{m2, M2} => B_new = \[S\]^{m1+m2, M1+M2}
/// 5. **break** a single brick with min >= 1 and max != min into two simpler bricks where B = \[S\]^{min,max} =>
/// B1 = \[S^min\]^{1,1}, B2 = \[S\]^{0, max-min}.
/// e.g. B = \[{a}\]^{2,5} => B1 = \[{aa}\]^{1,1}, B2 = \[{a}\]^{0,3}
///
/// Since normalization is rather expensive w.r.t. runtime and since it could entail a precision loss,
/// it is only computed after a merge or widening operation.
pub fn normalize(&self) -> Self {
let mut normalized = self.unwrap_value();
// A second vector to do lookups and to iterate over the values.
let mut lookup = self.unwrap_value();
let mut unchanged = false;
while !unchanged {
for (index, brick_domain) in lookup.iter().enumerate() {
// Ignore Top value bricks.
if brick_domain.is_top() {
continue;
}
// Get the current brick for checks .
let current_brick = brick_domain.unwrap_value();
// --Step 1-- Check whether the brick contains the empty string only.
// If so, remove the brick from the list.
if current_brick.is_empty_string() {
normalized.remove(index);
break;
}
// --Step 3-- Check whether the lower and upper bound are greater or equal than 1.
// If so, create all permutations of the size of min=max and set them to 1.
if current_brick.get_min() == current_brick.get_max() && current_brick.get_min() > 1
{
let transformed_brick = current_brick
.transform_brick_with_min_max_equal(current_brick.get_min() as usize);
normalized[index] = BrickDomain::Value(transformed_brick);
break;
}
// --Step 5-- Check whether min >= 1 and max > min.
// If so, break the brick into simpler bricks.
if current_brick.get_min() >= 1 && current_brick.get_max() > current_brick.get_min()
{
let (new_brick1, new_brick2) =
current_brick.break_single_brick_into_simpler_bricks();
normalized[index] = BrickDomain::Value(new_brick1);
normalized.insert(index + 1, BrickDomain::Value(new_brick2));
break;
}
// Check whether bricks can be merged.
if let Some(next_brick_domain) = lookup.get(index + 1) {
if !next_brick_domain.is_top() {
let next_brick = next_brick_domain.unwrap_value();
// --Step 2-- Check whether two successive bricks are bound by one in min and max.
// If so, merge them by taking the cartesian product of the sequences.
if (
current_brick.get_min(),
current_brick.get_max(),
next_brick.get_min(),
next_brick.get_max(),
) == (1, 1, 1, 1)
{
let merged_brick =
current_brick.merge_bricks_with_bound_one(next_brick);
normalized[index] = BrickDomain::Value(merged_brick);
normalized.remove(index + 1);
break;
}
// --Step 4-- Check whether two successive bricks have equal content.
// If so, merge them with the same content and add their min and max values together.
else if current_brick.get_sequence() == next_brick.get_sequence() {
let merged_brick =
current_brick.merge_bricks_with_equal_content(next_brick);
normalized[index] = BrickDomain::Value(merged_brick);
normalized.remove(index + 1);
break;
}
}
}
}
if lookup == normalized {
unchanged = true;
} else {
lookup = normalized.clone();
}
}
BricksDomain::Value(normalized)
}
/// Before merging two BrickDomain lists, the shorter one has to be padded
/// with empty string bricks. To achieve higher positional
/// correspondence, empty string bricks will be added in a way that
/// equal bricks have the same indices in both lists.
fn pad_list(&self, other: &BricksDomain) -> Self {
let mut short_list = self.unwrap_value();
let long_list = other.unwrap_value();
let mut new_list: Vec<BrickDomain> = Vec::new();
let len_diff = long_list.len() - short_list.len();
let mut empty_bricks_added = 0;
for i in 0..long_list.len() {
if empty_bricks_added >= len_diff {
new_list.push(short_list.get(0).unwrap().clone());
short_list.remove(0);
} else if short_list.is_empty()
|| short_list.get(0).unwrap() != long_list.get(i).unwrap()
{
new_list.push(BrickDomain::get_empty_brick_domain());
empty_bricks_added += 1;
} else {
new_list.push(short_list.get(0).unwrap().clone());
short_list.remove(0);
}
}
BricksDomain::Value(new_list)
}
/// Unwraps a list of BrickDomains and panic if it's *Top*
fn unwrap_value(&self) -> Vec<BrickDomain> {
match self {
BricksDomain::Value(bricks) => bricks.clone(),
_ => panic!("Unexpected Brick Domain type."),
}
}
}
impl fmt::Display for BricksDomain {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
BricksDomain::Top => write!(f, "Top"),
BricksDomain::Value(brick_domains) => {
write!(f, "Bricks: ")?;
for brick_domain in brick_domains.iter() {
write!(f, "{} ", brick_domain)?;
}
Ok(())
}
}
}
}
impl DomainInsertion for BricksDomain {
/// Appends new bricks to the current BricksDomain.
/// Used to insert format specifier in sprintf calls and for strcat call.
fn append_string_domain(&self, string_domain: &Self) -> Self {
match self {
BricksDomain::Top => match string_domain {
BricksDomain::Top => BricksDomain::Top,
BricksDomain::Value(bricks) => {
let mut new_bricks = vec![BrickDomain::Top];
new_bricks.append(&mut bricks.clone());
BricksDomain::Value(new_bricks)
}
},
BricksDomain::Value(bricks) => match string_domain {
BricksDomain::Top => {
let mut new_bricks = bricks.clone();
new_bricks.push(BrickDomain::Top);
BricksDomain::Value(new_bricks)
}
BricksDomain::Value(other_bricks) => {
let mut new_bricks = bricks.clone();
new_bricks.append(&mut other_bricks.clone());
BricksDomain::Value(new_bricks)
}
},
}
}
/// Create a string domain that approximates float values.
fn create_float_value_domain() -> Self {
BricksDomain::from("[float inserted]".to_string())
}
/// Create a string domain that approximates char values.
fn create_char_domain() -> Self {
BricksDomain::from("[char inserted]".to_string())
}
/// Create a string domain that approximates integer values.
fn create_integer_domain() -> Self {
BricksDomain::from("[integer inserted]".to_string())
}
/// Create a string domain that approximates pointer values.
fn create_pointer_value_domain() -> Self {
BricksDomain::Top
}
/// Creates a top value of the domain.
fn create_top_value_domain() -> Self {
BricksDomain::Top
}
/// Create a string domain that represents an empty string.
fn create_empty_string_domain() -> Self {
BricksDomain::from("".to_string())
}
}
impl AbstractDomain for BricksDomain {
/// Takes care of merging lists of bricks
fn merge(&self, other: &Self) -> Self {
if self.is_top() || other.is_top() {
Self::Top
} else if self == other {
self.clone()
} else {
let merged = self.widen(other);
if !merged.is_top() {
return merged.normalize();
}
merged
}
}
/// Check if the value is *Top*.
fn is_top(&self) -> bool {
matches!(self, Self::Top)
}
}
impl HasTop for BricksDomain {
/// Return a *Top* value
fn top(&self) -> Self {
Self::Top
}
}
/// The single brick domain that represents a set of character sequences
/// as well as the minimum and maximum of the sum of their occurrences.
///
/// e.g. \[{"mo", "de"}\]^{1,2} represents the following set of strings:
/// {mo, de, momo, dede, mode, demo}.
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
pub enum BrickDomain {
/// The *Top* value represents the powerset over the alphabet
/// of allowed characters with a minimum of 0 and a maximum of positive infinity.
Top,
/// The set of character sequences as well as the minimum and maximum of the sum of their occurrences.
Value(Brick),
}
impl BrickDomain {
/// Returns a new instance of the Brick Domain
pub fn new(string: String) -> Self {
let mut new_brick = Brick::new();
let mut sequence: BTreeSet<String> = BTreeSet::new();
sequence.insert(string);
new_brick.set_sequence(sequence);
new_brick.set_min(1);
new_brick.set_max(1);
BrickDomain::Value(new_brick)
}
/// Returns an empty string brick
fn get_empty_brick_domain() -> Self {
BrickDomain::Value(Brick::new())
}
/// Unwraps a brick value and panics if it's *Top*.
fn unwrap_value(&self) -> Brick {
match self {
BrickDomain::Value(brick) => brick.clone(),
_ => panic!("Unexpected Brick Domain type."),
}
}
}
impl From<String> for BricksDomain {
/// Returns a new instance of the Bricks Domain
fn from(string: String) -> Self {
BricksDomain::Value(vec![BrickDomain::new(string)])
}
}
impl AbstractDomain for BrickDomain {
/// Takes care of merging single bricks by taking the union
/// of the two brick's string sequences and the minimum and maximum
/// of their respective min and max values.
fn merge(&self, other: &Self) -> Self {
if self.is_top() || other.is_top() {
Self::Top
} else {
self.widen(other)
}
}
/// Check if the value is *Top*.
fn is_top(&self) -> bool {
matches!(self, Self::Top)
}
}
impl fmt::Display for BrickDomain {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
BrickDomain::Top => write!(f, "[T]"),
BrickDomain::Value(brick) => {
write!(
f,
"{:?}^({},{})",
brick.get_sequence(),
brick.get_min(),
brick.get_max(),
)
}
}
}
}
#[cfg(test)]
mod tests;
//! This module contains the Brick structure.
//! The Brick structure represents the set of all strings that can be built
//! through concatenation of a given sequence of strings with upper and lower boundaries.
//!
//! For instance, let \[{"mo", "de"}\]^{1,2} be a Brick. The following set of strings is
//! constructed through the aforementioned Brick:
//! - {mo, de, momo, dede, mode, demo}
use std::collections::BTreeSet;
use crate::prelude::*;
use itertools::Itertools;
/// A single Brick with the set of strings, a minimum and maximum bound.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct Brick {
sequence: BTreeSet<String>,
min: u32,
max: u32,
}
impl Default for Brick {
fn default() -> Self {
Self::new()
}
}
impl Brick {
/// Creates a new instance of the Brick struct.
pub fn new() -> Self {
Brick {
sequence: BTreeSet::new(),
min: 0,
max: 0,
}
}
/// Set the sequence of the Brick.
pub fn set_sequence(&mut self, sequence: BTreeSet<String>) {
self.sequence = sequence;
}
/// Set the minimum bound for the element occurrences in the Brick.
pub fn set_min(&mut self, min: u32) {
self.min = min;
}
/// Set the maximum bound for the element occurrences in the Brick.
pub fn set_max(&mut self, max: u32) {
self.max = max;
}
/// Returns a reference to the string sequence in the brick.
pub fn get_sequence(&self) -> &BTreeSet<String> {
&self.sequence
}
/// Returns the minimum occurrence of the sequences contained in the brick.
pub fn get_min(&self) -> u32 {
self.min
}
/// Returns the maximum occurrence of the sequences contained in the brick.
pub fn get_max(&self) -> u32 {
self.max
}
/// Checks whether a brick represents an empty string (Rule 1)
pub fn is_empty_string(&self) -> bool {
if self.sequence.is_empty() && self.min == 0 && self.max == 0 {
return true;
}
false
}
/// **merge** bricks with the same indices max = 1, min = 1, in a new single brick
/// with the new string set being the concatenation of the former two. e.g. B0 = \[{a,cd}\]^{1,1}
/// and B1 = \[{b,ef}\]^{1,1} become B_new = \[{ab, aef, cdb, cdef}\]^{1,1}.
pub fn merge_bricks_with_bound_one(&self, other: Brick) -> Self {
let product = self
.sequence
.iter()
.cartesian_product(other.sequence.iter())
.collect_vec();
let sequence: BTreeSet<String> = product
.iter()
.map(|&(str1, str2)| str1.clone() + str2)
.collect();
Brick {
sequence,
min: 1,
max: 1,
}
}
/// **transform** a brick in which the number of applications is constant (min = max) into one in which
/// min = max = 1. e.g. B = \[{a,b}\]^{2,2} => B_new = \[{aa, ab, ba, bb}\]^{1,1}.
pub fn transform_brick_with_min_max_equal(&self, length: usize) -> Self {
let permutations: BTreeSet<String> =
Self::generate_permutations_of_fixed_length(length, &self.sequence, Vec::new(), 1)
.into_iter()
.collect();
Brick {
sequence: permutations,
min: 1,
max: 1,
}
}
/// **merge** two bricks in which the set of strings is the same. e.g. B1 = \[S\]^{m1, M1}
/// and B2 = \[S\]^{m2, M2} => B_new = \[S\]^{m1+m2, M1+M2}
pub fn merge_bricks_with_equal_content(&self, other: Brick) -> Self {
Brick {
sequence: self.sequence.clone(),
min: self.min + other.min,
max: self.max + other.max,
}
}
/// **break** a single brick with min >= 1 and max != min into two simpler bricks where B = \[S\]^{min,max} =>
/// B1 = \[S^min\]^{1,1}, B2 = \[S\]^{0, max-min}.
/// e.g. B = \[{a}\]^{2,5} => B1 = \[{aa}\]^{1,1}, B2 = \[{a}\]^{0,3}
pub fn break_single_brick_into_simpler_bricks(&self) -> (Self, Self) {
let brick_1 = self.transform_brick_with_min_max_equal(self.min as usize);
let brick_2 = Brick {
sequence: self.sequence.clone(),
min: 0,
max: self.max - self.min,
};
(brick_1, brick_2)
}
/// Recursive function to generate sequence permutations of fixed length.
/// For instance, \[{a,b}\] with length = 2 becomes \[{aa, ab, ba, bb}\]
/// Note that the length can also be greater or smaller than
/// the number of elements in the sequence.
pub fn generate_permutations_of_fixed_length(
max_length: usize,
sequence: &BTreeSet<String>,
generated: Vec<String>,
current_length: usize,
) -> Vec<String> {
let mut new_gen: Vec<String> = Vec::new();
for s in sequence.iter() {
if generated.is_empty() {
new_gen.push(s.to_string());
} else {
for g in generated.iter() {
new_gen.push(g.clone() + s);
}
}
}
if current_length < max_length {
return Self::generate_permutations_of_fixed_length(
max_length,
sequence,
new_gen,
current_length + 1,
);
}
new_gen
}
}
use std::collections::BTreeSet;
use crate::abstract_domain::bricks::widening::{
INTERVAL_THRESHOLD, LENGTH_THRESHOLD, SEQUENCE_THRESHOLD,
};
use super::*;
impl Brick {
fn mock_brick(sequence: Vec<String>, min: u32, max: u32) -> Brick {
let mut mocked = Brick::new();
mocked.set_sequence(sequence.into_iter().collect::<BTreeSet<String>>());
mocked.set_min(min);
mocked.set_max(max);
mocked
}
}
struct Setup {
brick0: BrickDomain,
brick1: BrickDomain,
brick2: BrickDomain,
brick3: BrickDomain,
brick4: BrickDomain,
brick5: BrickDomain,
}
impl Setup {
fn new() -> Self {
Setup {
brick0: BrickDomain::Value(Brick::mock_brick(
vec![String::from("a"), String::from("b")],
2,
2,
)),
brick1: BrickDomain::Value(Brick::mock_brick(
vec![String::from("a"), String::from("cd")],
1,
1,
)),
brick2: BrickDomain::Value(Brick::mock_brick(
vec![String::from("b"), String::from("ef")],
1,
1,
)),
brick3: BrickDomain::Value(Brick::mock_brick(
vec![String::from("a"), String::from("b")],
2,
3,
)),
brick4: BrickDomain::Value(Brick::mock_brick(
vec![String::from("a"), String::from("b")],
0,
1,
)),
brick5: BrickDomain::Value(Brick::mock_brick(vec![String::from("a")], 1, 1)),
}
}
}
#[test]
fn test_merge_brick_domain() {
let setup = Setup::new();
let merged_brick_domain = setup.brick0.merge(&setup.brick4);
let expected = BrickDomain::Value(Brick::mock_brick(
vec![String::from("a"), String::from("b")],
0,
2,
));
assert_eq!(merged_brick_domain, expected);
}
#[test]
fn test_brick_is_less_or_equal() {
let setup = Setup::new();
// Test Case 1: brick0 = {a,b}^[2,2] is less than brick3 = {a,b}^[2,3]
assert!(setup.brick0.is_less_or_equal(&setup.brick3));
// Test Case 2: brick0 = {a,b}^[2,2] is less than Top
assert!(setup.brick0.is_less_or_equal(&BrickDomain::Top));
// Test Case 3: Top is not less than brick0 = {a,b}^[2,2]
assert!(!BrickDomain::Top.is_less_or_equal(&setup.brick0));
// Test Case 4: Top is less than Top
assert!(BrickDomain::Top.is_less_or_equal(&BrickDomain::Top));
// Test Case 5: self represents an empty string and the other is a 'normal' brick.
assert!(BrickDomain::get_empty_brick_domain().is_less_or_equal(&setup.brick0));
// Test Case 6: other represents an empty string and self is a 'normal' brick.
assert!(setup
.brick0
.is_less_or_equal(&BrickDomain::get_empty_brick_domain()));
}
#[test]
fn test_brick_widen() {
let setup = Setup::new();
// Test Case 1: No widening is applied since no thresholds are exceeded.
let widened_brick_domain = setup.brick0.widen(&setup.brick4);
let expected = BrickDomain::Value(Brick::mock_brick(
vec![String::from("a"), String::from("b")],
0,
2,
));
assert_eq!(widened_brick_domain, expected);
// Test Case 2: Widening is applied since the sequence threshold is exceeded.
let large_sequence = (0..SEQUENCE_THRESHOLD)
.collect::<Vec<usize>>()
.iter()
.map(|s| s.to_string())
.collect::<Vec<String>>();
let widened_brick_domain =
setup
.brick0
.widen(&BrickDomain::Value(Brick::mock_brick(large_sequence, 0, 1)));
assert_eq!(widened_brick_domain, BrickDomain::Top);
// Test Case 3: Widening is applied since the interval threshold is exceeded.
let high_bounded_brick = BrickDomain::Value(Brick::mock_brick(
vec![String::from("a"), String::from("b")],
0,
(INTERVAL_THRESHOLD + 1) as u32,
));
let widened_brick_domain = setup.brick0.widen(&high_bounded_brick);
let expected = BrickDomain::Value(Brick::mock_brick(
vec![String::from("a"), String::from("b")],
0,
u32::MAX,
));
assert_eq!(widened_brick_domain, expected);
}
#[test]
fn test_merge_bricks_domain() {
let setup = Setup::new();
let first_bricks = BricksDomain::Value(vec![setup.brick0.clone()]);
let second_bricks = BricksDomain::Value(vec![setup.brick0.clone(), setup.brick1.clone()]);
let merged_bricks = first_bricks.merge(&second_bricks);
let merged_with_empty = BrickDomain::Value(Brick::mock_brick(
vec![String::from("a"), String::from("cd")],
0,
1,
));
let normalized_brick = BrickDomain::Value(Brick::mock_brick(
vec![
"aa".to_string(),
"ab".to_string(),
"ba".to_string(),
"bb".to_string(),
],
1,
1,
));
let expected = BricksDomain::Value(vec![normalized_brick, merged_with_empty]);
assert_eq!(merged_bricks, expected);
}
#[test]
fn test_bricks_is_less_or_equal() {
let setup = Setup::new();
let mut bricks1 = vec![
setup.brick3,
BrickDomain::Value(Brick::mock_brick(
vec!["c".to_string(), "d".to_string()],
4,
5,
)),
];
let mut bricks2 = vec![
BrickDomain::Value(Brick::mock_brick(
vec!["a".to_string(), "b".to_string()],
1,
4,
)),
BrickDomain::Value(Brick::mock_brick(
vec!["c".to_string(), "d".to_string(), "e".to_string()],
4,
5,
)),
];
// Test Case 1: bricks1 is less or equal to bricks2
assert!(BricksDomain::Value(bricks1.clone())
.is_less_or_equal(&BricksDomain::Value(bricks2.clone())));
// Test Case 2: bricks1 is shorter than bricks2 and is extended with an empty string brick.
// This does not change the outcome.
bricks1.push(BrickDomain::get_empty_brick_domain());
bricks2.push(setup.brick5);
assert!(BricksDomain::Value(bricks1.clone())
.is_less_or_equal(&BricksDomain::Value(bricks2.clone())));
// Test Case 3: Top value in bricks1 and Top value in bricks2
bricks1.push(BrickDomain::Top);
bricks2.push(BrickDomain::Top);
assert!(BricksDomain::Value(bricks1.clone())
.is_less_or_equal(&BricksDomain::Value(bricks2.clone())));
// Test Case 4: some value in bricks1 and Top value in bricks2
bricks1.push(setup.brick4);
bricks2.push(BrickDomain::Top);
assert!(BricksDomain::Value(bricks1.clone())
.is_less_or_equal(&BricksDomain::Value(bricks2.clone())));
// Test Case 5: Top value in bricks1 and some value in bricks2
bricks1.push(BrickDomain::Top);
bricks2.push(setup.brick2);
assert!(!BricksDomain::Value(bricks1.clone())
.is_less_or_equal(&BricksDomain::Value(bricks2.clone())));
}
#[test]
fn test_bricks_widen() {
let setup = Setup::new();
let mut bricks1 = vec![
setup.brick3,
BrickDomain::Value(Brick::mock_brick(
vec!["c".to_string(), "d".to_string()],
4,
5,
)),
];
let mut bricks2 = vec![
BrickDomain::Value(Brick::mock_brick(
vec!["a".to_string(), "b".to_string()],
1,
4,
)),
BrickDomain::Value(Brick::mock_brick(
vec!["c".to_string(), "d".to_string(), "e".to_string()],
4,
5,
)),
];
// Test Case 1: The less or equal relation holds and no threshold is exceeded.
// Equivalent to normal merge.
assert_eq!(
BricksDomain::Value(bricks1.clone()).widen(&BricksDomain::Value(bricks2.clone())),
BricksDomain::Value(bricks2.clone())
);
// Test Case 2: The first BricksDomain exceeds the length threshold.
let mut extended_bricks = bricks1.clone();
for _ in 0..LENGTH_THRESHOLD {
extended_bricks.push(BrickDomain::get_empty_brick_domain());
}
assert_eq!(
BricksDomain::Value(extended_bricks.clone()).widen(&BricksDomain::Value(bricks2.clone())),
BricksDomain::Top
);
// Test Case 3: The less or equal relation does not hold.
bricks1.push(BrickDomain::Top);
bricks2.push(setup.brick2);
assert_eq!(
BricksDomain::Value(bricks1.clone()).widen(&BricksDomain::Value(bricks2.clone())),
BricksDomain::Top
);
}
#[test]
fn test_brick_list_is_less_or_equal() {
let setup = Setup::new();
let first_bricks = BricksDomain::Value(vec![
setup.brick0.clone(),
BrickDomain::get_empty_brick_domain(),
]);
let second_bricks = BricksDomain::Value(vec![setup.brick0.clone(), setup.brick1.clone()]);
assert!(first_bricks.is_less_or_equal(&second_bricks));
}
#[test]
fn test_normalize() {
let setup = Setup::new();
let to_normalize: BricksDomain =
BricksDomain::Value(vec![setup.brick5, setup.brick3, setup.brick4]); // ["a"]^{1,1}["a", "b"]^{2,3}["a", "b"]^{0,1}
let normalized = to_normalize.normalize();
let expected_brick1 = BrickDomain::Value(Brick::mock_brick(
vec!["aaa", "aab", "aba", "abb"]
.iter()
.map(|&s| String::from(s))
.collect(),
1,
1,
));
let expected_brick2 = BrickDomain::Value(Brick::mock_brick(
vec!["a", "b"].iter().map(|&s| String::from(s)).collect(),
0,
2,
));
let expected = BricksDomain::Value(vec![expected_brick1, expected_brick2]);
assert_eq!(normalized, expected);
}
#[test]
fn test_generate_permutations_of_fixed_length() {
let length: usize = 2;
let sequence: BTreeSet<String> = vec!["a_", "b_", "c_"]
.into_iter()
.map(|s| String::from(s))
.collect();
let result = Brick::generate_permutations_of_fixed_length(length, &sequence, Vec::new(), 1);
let expected: Vec<String> = vec![
"a_a_", "b_a_", "c_a_", "a_b_", "b_b_", "c_b_", "a_c_", "b_c_", "c_c_",
]
.into_iter()
.map(|s| String::from(s))
.collect();
assert_eq!(result, expected);
}
#[test]
fn test_break_single_brick_into_simpler_bricks() {
let setup = Setup::new();
let complex_brick = setup.brick3.unwrap_value(); // ["a", "b"]^{2,3}
let (result1, result2) = complex_brick.break_single_brick_into_simpler_bricks();
let expected_brick1 = Brick::mock_brick(
vec!["aa", "ba", "ab", "bb"]
.iter()
.map(|&s| String::from(s))
.collect(),
1,
1,
);
let expected_brick2 = Brick::mock_brick(
vec!["a", "b"].iter().map(|&s| String::from(s)).collect(),
0,
1,
);
assert_eq!(result1, expected_brick1);
assert_eq!(result2, expected_brick2);
}
#[test]
fn test_merge_bricks_with_equal_content() {
let setup = Setup::new();
let merge1 = setup.brick0.unwrap_value();
let merge2 = setup.brick4.unwrap_value();
let result = merge1.merge_bricks_with_equal_content(merge2);
let expected = setup.brick3.unwrap_value();
assert_eq!(result, expected);
}
#[test]
fn test_transform_brick_with_min_max_equal() {
let setup = Setup::new();
let not_normalized = setup.brick0.unwrap_value();
let result =
not_normalized.transform_brick_with_min_max_equal(not_normalized.get_min() as usize);
let expected_brick = Brick::mock_brick(
vec!["aa", "ba", "ab", "bb"]
.iter()
.map(|&s| String::from(s))
.collect(),
1,
1,
);
assert_eq!(result, expected_brick);
}
#[test]
fn test_merge_bricks_with_bound_one() {
let setup = Setup::new();
let merge1 = setup.brick1.unwrap_value();
let merge2 = setup.brick2.unwrap_value();
let result = merge1.merge_bricks_with_bound_one(merge2);
let expected_brick = Brick::mock_brick(
vec!["ab", "aef", "cdb", "cdef"]
.iter()
.map(|&s| String::from(s))
.collect(),
1,
1,
);
assert_eq!(result, expected_brick);
}
#[test]
fn test_empty_string() {
let setup = Setup::new();
let brick = setup.brick5.unwrap_value();
let empty_brick = BrickDomain::get_empty_brick_domain().unwrap_value();
assert!(!brick.is_empty_string());
assert!(empty_brick.is_empty_string());
}
#[test]
fn test_pad_list() {
let setup = Setup::new();
let empty_brick = BrickDomain::get_empty_brick_domain();
let short_list = vec![
setup.brick0.clone(),
setup.brick1.clone(),
setup.brick2.clone(),
];
let long_list = vec![
setup.brick3,
setup.brick0.clone(),
setup.brick1.clone(),
setup.brick4,
setup.brick5,
];
let new_list = BricksDomain::Value(short_list).pad_list(&BricksDomain::Value(long_list));
let expected_list = BricksDomain::Value(vec![
empty_brick.clone(),
setup.brick0,
setup.brick1,
empty_brick.clone(),
setup.brick2,
]);
assert_eq!(new_list, expected_list);
}
#[test]
fn test_append_string_domain() {
let bricks_one = BricksDomain::Value(vec![BrickDomain::Value(Brick::mock_brick(
vec!["cat ".to_string()],
1,
1,
))]);
let bricks_two = BricksDomain::Value(vec![BrickDomain::Value(Brick::mock_brick(
vec!["bash.sh".to_string()],
1,
1,
))]);
let top_bricks = BricksDomain::Top;
assert_eq!(
BricksDomain::Top,
top_bricks.append_string_domain(&top_bricks)
);
let expected_bricks = BricksDomain::Value(vec![
BrickDomain::Value(Brick::mock_brick(vec!["cat ".to_string()], 1, 1)),
BrickDomain::Top,
]);
assert_eq!(
expected_bricks,
bricks_one.append_string_domain(&top_bricks)
);
let expected_bricks = BricksDomain::Value(vec![
BrickDomain::Top,
BrickDomain::Value(Brick::mock_brick(vec!["bash.sh".to_string()], 1, 1)),
]);
assert_eq!(
expected_bricks,
top_bricks.append_string_domain(&bricks_two)
);
let expected_bricks = BricksDomain::Value(vec![
BrickDomain::Value(Brick::mock_brick(vec!["cat ".to_string()], 1, 1)),
BrickDomain::Value(Brick::mock_brick(vec!["bash.sh".to_string()], 1, 1)),
]);
assert_eq!(
expected_bricks,
bricks_one.append_string_domain(&bricks_two)
);
}
//! This module implements the widening operator for the BrickDomain and BricksDomain.
//! The exact widening procedure depends on three constants.
//! - The *interval threshold* overapproximates the number of times string sequences can occur in a brick.
//! - The *sequence threshold* overapproximates the number of string sequences in a brick by forcing a *Top* value.
//! - The *length threshold* overapproximates the number of bricks in the BricksDomain and forces a *Top* value.
//! A merge is processed without widening when none of the thresholds are exceeded.
use std::{
cmp::{
max, min,
Ordering::{Equal, Greater, Less},
},
collections::BTreeSet,
};
use crate::abstract_domain::AbstractDomain;
use super::{brick::Brick, BrickDomain, BricksDomain};
pub const INTERVAL_THRESHOLD: usize = 8;
pub const SEQUENCE_THRESHOLD: usize = 8;
pub const LENGTH_THRESHOLD: usize = 32;
impl BricksDomain {
/// The widen function of the BricksDomain widens the values during a merge.
/// If the two BrickDomain lists are not comparable or either list exceeds
/// the length threshold, *Top* is returned.
/// Otherwise, the shorter list is padded and the widen function of the
/// BrickDomain is applied to each element in both lists.
/// If after the widening all BrickDomain values are *Top*, return
/// the *Top* value for the BricksDomain.
pub fn widen(&self, other: &BricksDomain) -> Self {
let self_num_of_bricks = self.unwrap_value().len();
let other_num_of_bricks = other.unwrap_value().len();
let mut new_self = self.clone();
let mut new_other = other.clone();
match self_num_of_bricks.cmp(&other_num_of_bricks) {
Less => {
new_self = self.pad_list(other);
}
Greater => {
new_other = other.pad_list(self);
}
Equal => (),
}
if !new_self.is_less_or_equal(other) && !new_other.is_less_or_equal(self)
|| self_num_of_bricks > LENGTH_THRESHOLD
|| other_num_of_bricks > LENGTH_THRESHOLD
{
return BricksDomain::Top;
}
let mut widened_brick_domain_list: Vec<BrickDomain> = Vec::new();
for (self_brick, other_brick) in new_self
.unwrap_value()
.iter()
.zip(new_other.unwrap_value().iter())
{
widened_brick_domain_list.push(self_brick.merge(other_brick));
}
if BricksDomain::all_bricks_are_top(&widened_brick_domain_list) {
return BricksDomain::Top;
}
BricksDomain::Value(widened_brick_domain_list)
}
/// Checks whether all bricks of the BricksDomain are *Top* values.
/// If so, the BricksDomain itself should be converted into a *Top* value.
pub fn all_bricks_are_top(bricks: &[BrickDomain]) -> bool {
bricks.iter().all(|brick| matches!(brick, BrickDomain::Top))
}
/// Checks whether the current BricksDomain is less or equal than the other BricksDomain
/// by definition of the partial order.
pub fn is_less_or_equal(&self, other: &BricksDomain) -> bool {
self.unwrap_value()
.iter()
.zip(other.unwrap_value().iter())
.all(|(self_brick, other_brick)| self_brick.is_less_or_equal(other_brick))
}
}
impl BrickDomain {
/// The widen function of the BrickDomain takes the union of both
/// BrickDomains and returns *Top* if the number of sequences exceeds
/// a certain threshold.
/// If neither of the domains are *Top*, the minimum and maximum
/// of the interval bounds are taken and it is checked whether
/// their difference exceeds a certain threshold.
/// If so *min* is set to 0 and *max* is set to infinity (here Max value of 32 bits).
/// Otherwise, their values are taken as new bounds for the merged domain.
pub fn widen(&self, other: &BrickDomain) -> Self {
let self_brick = self.unwrap_value();
let other_brick = other.unwrap_value();
let merged_sequence = self_brick
.get_sequence()
.union(other_brick.get_sequence())
.cloned()
.collect::<BTreeSet<String>>();
if merged_sequence.len() > SEQUENCE_THRESHOLD {
return BrickDomain::Top;
}
let mut widened_brick = Brick::new();
let min_bound = min(self_brick.get_min(), other_brick.get_min());
let max_bound = max(self_brick.get_max(), other_brick.get_max());
if max_bound - min_bound > INTERVAL_THRESHOLD as u32 {
widened_brick.set_min(0);
widened_brick.set_max(u32::MAX);
} else {
widened_brick.set_min(min_bound);
widened_brick.set_max(max_bound);
}
widened_brick.set_sequence(merged_sequence);
BrickDomain::Value(widened_brick)
}
/// Checks whether the current BrickDomain is less or equal than the other BrickDomain
/// by definition of the partial order.
/// Empty strings are ignored for order comparisons.
pub fn is_less_or_equal(&self, other: &BrickDomain) -> bool {
match (self.is_top(), other.is_top()) {
(false, false) => {
let self_brick = self.unwrap_value();
let other_brick = other.unwrap_value();
if self_brick.is_empty_string() || other_brick.is_empty_string() {
return true;
}
if self_brick
.get_sequence()
.is_subset(other_brick.get_sequence())
&& self_brick.get_min() >= other_brick.get_min()
&& self_brick.get_max() <= other_brick.get_max()
{
return true;
}
false
}
(true, false) => false,
(false, true) | (true, true) => true,
}
}
}
//! This module contains the Character Inclusion Domain (CI).
//!
//! This domain considers the characters of a string and distinguishes
//! between two scenarios which are stored in different HashSets.
//! - The first set contains characters that are certainly contained in
//! the string.
//! - The second set contains characters that may be in the string.
//!
//! This distinction is made when two CI domains merge.
//! Furthermore, the CI domain does not preserve information about the order of characters.
//! The *Top* value of the CI domain stands for an empty set of certainly
//! contained characters and the whole alphabet of allowed characters for the possibly contained characters.
//!
//! The following presents an example which shows how the CI domain works:
//! 1. When a string is assigned to the CI domain its unique characters are stored in both
//! sets. e.g. "Hello, World!" => ({H,e,l,o,',',' ',W,o,r,d}, {H,e,l,o,',',' ',W,o,r,d})
//! 2. When two strings are concatenated, the union of the two sets of the two domains is taken.
//! e.g. "Hello, " + "World" => ({H,e,l,o,',',' '} v {W,o,r,d}, {H,e,l,o,',',' '} v {W,o,r,d})
//! 3. When two domains are merged, the intersection of the certain sets and the union of possible sets are taken.
//! e.g. ({H,e,l,o,',',' '}, {H,e,l,o,',',' '}) v ({W,o,r,l,d}, {W,o,r,l,d}) => ({l,o}, {H,e,l,o,',',' ',W,o,r,d})
use std::{collections::BTreeSet, fmt};
use crate::prelude::*;
use std::fmt::Debug;
use super::{AbstractDomain, DomainInsertion, HasTop};
/// The `CharacterInclusionDomain` is a abstract domain describing the characters a string certainly has
/// and the characters a string may have.
///
/// The value comprises of a set of certainly contained characters and a set of possibly contained characters
/// while the *Top* value does not get any data. However, the *Top* value stands for an empty set of certainly
/// contained characters and the whole alphabet of allowed characters for the possibly contained characters.
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
pub enum CharacterInclusionDomain {
/// The *Top* value stands for an empty set of certainly contained characters and
/// the whole alphabet of allowed characters for the possibly contained characters.
Top,
/// The set of certainly contained characters and a set of possibly contained characters
Value((CharacterSet, CharacterSet)),
}
impl CharacterInclusionDomain {
/// Unwraps the values from the Character Inclusion Domain
pub fn unwrap_value(&self) -> (CharacterSet, CharacterSet) {
match self {
CharacterInclusionDomain::Value(value) => value.clone(),
_ => panic!("Unexpected Character Inclusion type."),
}
}
}
impl DomainInsertion for CharacterInclusionDomain {
/// Append string domain as part of a concatenation. (different to merge)
fn append_string_domain(&self, string_domain: &Self) -> CharacterInclusionDomain {
match self {
CharacterInclusionDomain::Value((self_certain, self_possible)) => match string_domain {
CharacterInclusionDomain::Value((other_certain, other_possible)) => {
CharacterInclusionDomain::Value((
self_certain.union(other_certain.clone()),
self_possible.union(other_possible.clone()),
))
}
CharacterInclusionDomain::Top => {
CharacterInclusionDomain::Value((self_certain.clone(), CharacterSet::Top))
}
},
CharacterInclusionDomain::Top => match string_domain {
CharacterInclusionDomain::Value((other_certain, _)) => {
CharacterInclusionDomain::Value((other_certain.clone(), CharacterSet::Top))
}
CharacterInclusionDomain::Top => CharacterInclusionDomain::Top,
},
}
}
/// Create a string domain that approximates float values.
fn create_float_value_domain() -> Self {
let float_character_set: BTreeSet<char> = vec![
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', '-', 'a', 'i', 'n', 'f', 'e',
'E',
]
.into_iter()
.collect();
CharacterInclusionDomain::Value((
CharacterSet::Value(vec![].into_iter().collect()),
CharacterSet::Value(float_character_set),
))
}
/// Create a string domain that approximates char values.
fn create_char_domain() -> Self {
CharacterInclusionDomain::Top
}
/// Create a string domain that approximates integer values.
fn create_integer_domain() -> Self {
let integer_character_set: BTreeSet<char> =
vec!['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-']
.into_iter()
.collect();
CharacterInclusionDomain::Value((
CharacterSet::Value(vec![].into_iter().collect()),
CharacterSet::Value(integer_character_set),
))
}
/// Create a string domain that approximates pointer values.
fn create_pointer_value_domain() -> Self {
CharacterInclusionDomain::Top
}
/// Creates a top value of the domain.
fn create_top_value_domain() -> Self {
CharacterInclusionDomain::Top
}
/// Create a string domain that represents an empty string.
fn create_empty_string_domain() -> Self {
CharacterInclusionDomain::from("".to_string())
}
}
impl From<String> for CharacterInclusionDomain {
fn from(string: String) -> Self {
let characters: BTreeSet<char> = string.chars().collect();
CharacterInclusionDomain::Value((
CharacterSet::Value(characters.clone()),
CharacterSet::Value(characters),
))
}
}
impl AbstractDomain for CharacterInclusionDomain {
/// Merge two values; Takes the intersection of the certainly contained characters
/// and the union of the possibly contained characters.
/// Returns *Top* if either Domain represents it.
fn merge(&self, other: &Self) -> Self {
if self.is_top() || other.is_top() {
Self::Top
} else if self == other {
self.clone()
} else {
let (self_certain, self_possible) = self.unwrap_value();
let (other_certain, other_possible) = other.unwrap_value();
Self::Value((
self_certain.intersection(other_certain),
self_possible.union(other_possible),
))
}
}
/// Check if the value is *Top*.
fn is_top(&self) -> bool {
matches!(self, Self::Top)
}
}
impl HasTop for CharacterInclusionDomain {
/// Return a *Top* value
fn top(&self) -> Self {
CharacterInclusionDomain::Top
}
}
impl fmt::Display for CharacterInclusionDomain {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
CharacterInclusionDomain::Top => write!(f, "Top"),
CharacterInclusionDomain::Value((certain_set, possible_set)) => {
write!(f, "Certain: {}, Possible: {}", certain_set, possible_set)
}
}
}
}
/// A domain that represents character sets.
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
pub enum CharacterSet {
/// The *Top* value represents a character set of all allowed characters.
Top,
/// Represents a real subset of all allowed characters.
Value(BTreeSet<char>),
}
impl CharacterSet {
/// Unwraps the values from the CharacterSet
pub fn unwrap_value(&self) -> BTreeSet<char> {
match self {
CharacterSet::Value(value) => value.clone(),
_ => panic!("Unexpected CharacterSet type."),
}
}
/// Takes the intersection of two character sets.
/// None of the sets should be *Top* since otherwise
/// the whole CharacterInclusionDomain would be *Top*
/// which is checked beforehand.
pub fn intersection(&self, other: Self) -> Self {
if self.is_top() || other.is_top() {
panic!("Unexpected Top Value for CharacterSet intersection.")
}
CharacterSet::Value(
self.unwrap_value()
.intersection(&other.unwrap_value())
.cloned()
.collect(),
)
}
/// Takes the union of two character sets.
/// If either of them is *Top* the union is *Top*.
/// Otherwise the standard set union is taken.
pub fn union(&self, other: Self) -> Self {
if self.is_top() || other.is_top() {
return CharacterSet::Top;
}
CharacterSet::Value(
self.unwrap_value()
.union(&other.unwrap_value())
.cloned()
.collect(),
)
}
/// Check if the value is *Top*.
fn is_top(&self) -> bool {
matches!(self, Self::Top)
}
}
impl HasTop for CharacterSet {
/// Return a *Top* value
fn top(&self) -> Self {
CharacterSet::Top
}
}
impl fmt::Display for CharacterSet {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
CharacterSet::Top => write!(f, "Top"),
CharacterSet::Value(char_set) => {
write!(f, "{:?}", char_set)
}
}
}
}
#[cfg(test)]
pub mod tests {
use super::*;
impl CharacterInclusionDomain {
pub fn ci(concrete: &str) -> CharacterInclusionDomain {
let abstract_set = CharacterSet::Value(concrete.chars().into_iter().collect());
CharacterInclusionDomain::Value((abstract_set.clone(), abstract_set.clone()))
}
}
#[test]
fn merging() {
let first = CharacterInclusionDomain::ci("abc");
let second = CharacterInclusionDomain::ci("def");
let third = CharacterInclusionDomain::ci("dabc");
let possible_set = CharacterSet::Value("abcdef".chars().into_iter().collect());
let certain_set = CharacterSet::Value("d".chars().into_iter().collect());
assert_eq!(
first.merge(&second),
CharacterInclusionDomain::Value((
CharacterSet::Value(BTreeSet::new()),
possible_set.clone()
))
);
assert_eq!(
third.merge(&second),
CharacterInclusionDomain::Value((certain_set, possible_set))
);
assert_eq!(
first.merge(&CharacterInclusionDomain::Top),
CharacterInclusionDomain::Top
);
assert_eq!(
CharacterInclusionDomain::Top.merge(&CharacterInclusionDomain::Top),
CharacterInclusionDomain::Top
);
}
}
......@@ -27,7 +27,7 @@ mod trait_impl;
///
/// The domain also contains a flag to indicate that it includes `Top` values,
/// i.e. values of fully unknown origin and offset.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct DataDomain<T: RegisterDomain> {
/// The byte size of the represented values.
size: ByteSize,
......@@ -234,6 +234,10 @@ mod tests {
contains_top_values: false,
}
}
pub fn insert_relative_value(&mut self, id: AbstractIdentifier, offset: T) {
self.relative_values.insert(id, offset);
}
}
fn bv(value: i64) -> BitvectorDomain {
......
......@@ -11,6 +11,11 @@ impl IntervalDomain {
IntervalDomain::new(Bitvector::from_i8(start), Bitvector::from_i8(end))
}
/// Return a new interval domain of 4-byte integers.
pub fn mock_i32(start: i32, end: i32) -> IntervalDomain {
IntervalDomain::new(Bitvector::from_i32(start), Bitvector::from_i32(end))
}
pub fn mock_with_bounds(
lower_bound: Option<i64>,
start: i64,
......
......@@ -19,6 +19,15 @@ pub use mem_region::*;
mod interval;
pub use interval::*;
mod bricks;
pub use bricks::*;
mod character_inclusion;
pub use character_inclusion::*;
mod strings;
pub use strings::*;
mod domain_map;
pub use domain_map::*;
......
/// A set of functions that all abstract string domains should implement.
pub trait DomainInsertion {
/// Inserts a string domain at a certain position if order is considered.
fn append_string_domain(&self, string_domain: &Self) -> Self;
/// Creates a string domain with characters that usually appear in an integer value.
fn create_integer_domain() -> Self;
/// Creates a string domain with characters that usually appear in a char value.
fn create_char_domain() -> Self;
/// Creates a string domain with characters that usually appear in a float value.
fn create_float_value_domain() -> Self;
/// Creates a string domain with characters that usually appear in a String value.
fn create_pointer_value_domain() -> Self;
/// Creates a top value of the currently used domain.
fn create_top_value_domain() -> Self;
/// Creates an empty string domain.
fn create_empty_string_domain() -> Self;
}
......@@ -8,3 +8,4 @@ pub mod forward_interprocedural_fixpoint;
pub mod graph;
pub mod interprocedural_fixpoint_generic;
pub mod pointer_inference;
pub mod string_abstraction;
//! This module contains the Context Object for the String Abstraction.
//! It holds all necessary information that stays unchanged during the analysis.
use std::{
collections::{HashMap, HashSet},
marker::PhantomData,
};
use petgraph::{graph::NodeIndex, visit::IntoNodeReferences};
use crate::{
abstract_domain::{AbstractDomain, DomainInsertion, HasTop},
analysis::{
forward_interprocedural_fixpoint::Context as _, graph::Node,
interprocedural_fixpoint_generic::NodeValue,
pointer_inference::PointerInference as PointerInferenceComputation,
pointer_inference::State as PointerInferenceState,
},
intermediate_representation::{Def, ExternSymbol, Project, Term, Tid},
utils::binary::RuntimeMemoryImage,
};
use super::{state::State, Config};
pub mod symbol_calls;
mod trait_impls;
/// Contains all context information needed for the string abstract fixpoint computation.
///
/// The struct also implements the `interprocedural_fixpoint::Context` trait to enable the fixpoint computation.
pub struct Context<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> {
/// A reference to the `Project` object representing the binary
pub project: &'a Project,
/// The runtime memory image for reading global read-only variables.
/// Note that values of writeable global memory segments are not tracked.
pub runtime_memory_image: &'a RuntimeMemoryImage,
/// A pointer to the results of the pointer inference analysis.
/// They are used to determine the targets of pointers to memory,
/// which in turn is used to keep track of taint on the stack or on the heap.
pub pointer_inference_results: &'a PointerInferenceComputation<'a>,
/// Maps the TIDs of functions that shall be treated as string extern symbols to the `ExternSymbol` object representing it.
pub string_symbol_map: HashMap<Tid, &'a ExternSymbol>,
/// Maps the TIDs of functions that shall be treated as general extern symbols to the `ExternSymbol` object representing it.
pub extern_symbol_map: HashMap<Tid, &'a ExternSymbol>,
/// Maps string symbols to their corresponding format string parameter index.
pub format_string_index_map: HashMap<String, usize>,
/// A map to get the node index of the `BlkStart` node containing a given [`Def`] as the first `Def` of the block.
/// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
pub block_start_node_map: HashMap<(Tid, Tid), NodeIndex>,
/// A set containing a given [`Def`](crate::intermediate_representation::Def) as the first `Def` of the block.
/// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
pub block_first_def_set: HashSet<(Tid, Tid)>,
/// A map to get the node index of the `BlkEnd` node containing a given [`Jmp`].
/// The keys are of the form `(Jmp-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
pub jmp_to_blk_end_node_map: HashMap<(Tid, Tid), NodeIndex>,
_phantom_string_domain: PhantomData<T>,
}
impl<'a, T: AbstractDomain + HasTop + Eq + From<String> + DomainInsertion> Context<'a, T> {
/// Create a new context object for a given project.
pub fn new(
project: &'a Project,
runtime_memory_image: &'a RuntimeMemoryImage,
pointer_inference_results: &'a PointerInferenceComputation<'a>,
config: Config,
) -> Context<'a, T> {
let string_symbol_map =
crate::utils::symbol_utils::get_symbol_map(project, &config.string_symbols[..]);
let mut extern_symbol_map = HashMap::new();
for (tid, symbol) in project.program.term.extern_symbols.iter() {
extern_symbol_map.insert(tid.clone(), symbol);
}
let mut block_start_node_map: HashMap<(Tid, Tid), NodeIndex> = HashMap::new();
let mut block_first_def_set = HashSet::new();
let mut jmp_to_blk_end_node_map = HashMap::new();
for (node_id, node) in pointer_inference_results.get_graph().node_references() {
match node {
Node::BlkStart(block, sub) => {
if let Some(def) = block.term.defs.get(0) {
block_start_node_map.insert((def.tid.clone(), sub.tid.clone()), node_id);
block_first_def_set.insert((def.tid.clone(), sub.tid.clone()));
}
}
Node::BlkEnd(block, sub) => {
for jmp in block.term.jmps.iter() {
jmp_to_blk_end_node_map.insert((jmp.tid.clone(), sub.tid.clone()), node_id);
}
}
_ => (),
}
}
Context {
project,
runtime_memory_image,
pointer_inference_results,
format_string_index_map: config.format_string_index.into_iter().collect(),
string_symbol_map,
extern_symbol_map,
block_start_node_map,
block_first_def_set,
jmp_to_blk_end_node_map,
_phantom_string_domain: PhantomData,
}
}
/// Get the current pointer inference state (if one can be found) for the given state.
fn get_current_pointer_inference_state(
&self,
state: &State<T>,
tid: &Tid,
) -> Option<PointerInferenceState> {
if let Some(pi_state) = state.get_pointer_inference_state() {
Some(pi_state.clone())
} else if let Some(node_id) = self
.block_start_node_map
.get(&(tid.clone(), state.get_current_sub().unwrap().tid.clone()))
{
match self.pointer_inference_results.get_node_value(*node_id) {
Some(NodeValue::Value(val)) => Some(val.clone()),
_ => None,
}
} else {
None
}
}
/// Update the pointer inference state contained in the given taint state
/// according to the effect of the given `Def` term.
fn update_pointer_inference_state(&self, state: &mut State<T>, def: &Term<Def>) {
if let Some(pi_state) = self.get_current_pointer_inference_state(state, &def.tid) {
let pi_context = self.pointer_inference_results.get_context();
let new_pi_state = pi_context.update_def(&pi_state, def);
state.set_pointer_inference_state(new_pi_state);
}
}
}
#[cfg(test)]
mod tests;
//! This module handles the string processing at external symbol calls.
use regex::Regex;
use std::collections::BTreeMap;
use crate::abstract_domain::{
AbstractIdentifier, DomainInsertion, HasTop, IntervalDomain, TryToBitvec,
};
use crate::analysis::pointer_inference::State as PointerInferenceState;
use crate::intermediate_representation::{Bitvector, Datatype};
use crate::{abstract_domain::AbstractDomain, intermediate_representation::ExternSymbol};
use super::super::state::State;
use super::Context;
mod memcpy;
mod scanf;
mod sprintf;
mod strcat;
impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Context<'a, T> {
/// Handles generic symbol calls by deleting all non callee saved pointer entries.
pub fn handle_generic_symbol_calls(
&self,
extern_symbol: &ExternSymbol,
state: &State<T>,
) -> State<T> {
let mut new_state = state.clone();
new_state.remove_non_callee_saved_pointer_entries_for_external_symbol(
self.project,
extern_symbol,
);
new_state
}
/// Handles calls to external symbols for which no ExternSymbol object is known.
pub fn handle_unknown_symbol_calls(&self, state: &mut State<T>) {
if let Some(standard_cconv) = self.project.get_standard_calling_convention() {
let mut filtered_map = state.get_variable_to_pointer_map().clone();
for (register, _) in state.get_variable_to_pointer_map().clone().iter() {
if !standard_cconv
.callee_saved_register
.contains(&register.name)
{
filtered_map.remove(register);
}
}
state.set_variable_to_pointer_map(filtered_map);
}
}
/// The output of a string symbol is added to the map of abstract strings.
/// If the symbol returns a format string, the string is approximated
/// as good as possible by checking the input parameters.
pub fn handle_string_symbol_calls(
&self,
extern_symbol: &ExternSymbol,
state: &State<T>,
) -> State<T> {
let mut new_state = match extern_symbol.name.as_str() {
"scanf" | "__isoc99_scanf" => self.handle_scanf_calls(state, extern_symbol),
"sscanf" | "__isoc99_sscanf" => self.handle_sscanf_calls(state, extern_symbol),
"sprintf" | "snprintf" | "vsprintf" | "vsnprintf" => {
self.handle_sprintf_and_snprintf_calls(state, extern_symbol)
}
"strcat" | "strncat" => self.handle_strcat_and_strncat_calls(state, extern_symbol),
"memcpy" => self.handle_memcpy_calls(state, extern_symbol),
"free" => self.handle_free(state, extern_symbol),
_ => panic!("Unexpected Extern Symbol."),
};
new_state.remove_non_callee_saved_pointer_entries_for_external_symbol(
self.project,
extern_symbol,
);
new_state
}
/// Takes the pointer target if there is only one and checks whether the target
/// is inside the current stack frame. If so, the string domain is added to the
/// analysis.
pub fn add_new_string_abstract_domain(
state: &mut State<T>,
pi_state: &PointerInferenceState,
pointer: &BTreeMap<AbstractIdentifier, IntervalDomain>,
domain_input_string: T,
) {
for (target, offset) in pointer.iter() {
if pi_state.caller_stack_ids.contains(target) || pi_state.stack_id == *target {
if let Ok(offset_value) = offset.try_to_offset() {
state.add_new_stack_offset_to_string_entry(
offset_value,
domain_input_string.clone(),
);
}
} else {
state.add_new_heap_to_string_entry(target.clone(), domain_input_string.clone());
}
}
}
/// Regex that filters format specifier from a format string.
pub fn re_format_specifier() -> Regex {
Regex::new(r#"%\d{0,2}([c,C,d,i,o,u,x,X,e,E,f,F,g,G,a,A,n,p,s,S]|hi|hd|hu|li|ld|lu|lli|lld|llu|lf|lg|le|la|lF|lG|lE|lA|Lf|Lg|Le|La|LF|LG|LE|LA)"#).expect("No valid regex!")
}
/// Merges domains from multiple pointer targets. The merged domain serves as input to a format string.
/// If one of the targets does not contain a domain or the offset of a stack target cannot be parsed,
/// a *Top* value is returned as no assumption can be made about the input.
pub fn merge_domains_from_multiple_pointer_targets(
state: &State<T>,
pi_state: &PointerInferenceState,
pointer: &BTreeMap<AbstractIdentifier, IntervalDomain>,
) -> T {
let mut domains: Vec<T> = Vec::new();
for (target, offset) in pointer.iter() {
// Check the stack offset map if the target points to a stack position.
if pi_state.caller_stack_ids.contains(target) || pi_state.stack_id == *target {
if let Ok(offset_value) = offset.try_to_offset() {
if let Some(domain) = state.get_stack_offset_to_string_map().get(&offset_value)
{
domains.push(domain.clone());
} else {
return T::create_top_value_domain();
}
} else {
return T::create_top_value_domain();
}
} else {
// Check the heap map if the target points to a heap position.
if let Some(domain) = state.get_heap_to_string_map().get(target) {
domains.push(domain.clone());
} else {
return T::create_top_value_domain();
}
}
}
let mut init_domain = domains.first().unwrap().clone();
domains.remove(0);
for remaining_domain in domains.iter() {
init_domain = init_domain.merge(remaining_domain);
}
init_domain
}
/// Calls the appropriate data type approximator.
pub fn approximate_string_domain_from_datatype(specifier: String) -> T {
match Datatype::from(specifier) {
Datatype::Char => T::create_char_domain(),
Datatype::Integer => T::create_integer_domain(),
Datatype::Pointer => T::create_pointer_value_domain(),
Datatype::Double | Datatype::Long | Datatype::LongDouble | Datatype::LongLong => {
T::create_float_value_domain()
}
_ => panic!("Invalid data type specifier from format string."),
}
}
/// Inserts an integer constant into the format string.
pub fn get_constant_integer_domain(constant: Bitvector) -> Option<T> {
if let Ok(integer) = constant.try_to_i64() {
return Some(T::from(integer.to_string()));
}
None
}
/// Inserts a char constant into the format string.
pub fn get_constant_char_domain(&self, constant: Bitvector) -> Option<T> {
if let Ok(Some(char_code)) = self.runtime_memory_image.read(
&constant,
self.project
.datatype_properties
.get_size_from_data_type(Datatype::Char),
) {
if let Some(c_char) = Context::<T>::parse_bitvec_to_char(char_code) {
return Some(T::from(c_char.to_string()));
}
} else if let Some(c_char) = Context::<T>::parse_bitvec_to_char(constant.clone()) {
return Some(T::from(c_char.to_string()));
}
None
}
/// Parses a bitvector to a char if possible.
pub fn parse_bitvec_to_char(char_code: Bitvector) -> Option<char> {
if let Ok(code) = char_code.try_to_u32() {
if let Some(c_char) = std::char::from_u32(code) {
return Some(c_char);
}
}
None
}
/// Inserts a string constant into the format string.
pub fn get_constant_string_domain(&self, constant: Bitvector) -> Option<T> {
if let Ok(string) = self
.runtime_memory_image
.read_string_until_null_terminator(&constant)
{
if !string.is_empty() {
return Some(T::from(string.to_string()));
}
}
None
}
/// Deletes string entries in the heap to string map if the corresponding pointer is used
/// to free memory space.
pub fn handle_free(&self, state: &State<T>, extern_symbol: &ExternSymbol) -> State<T> {
let mut new_state = state.clone();
if let Some(dest_arg) = extern_symbol.parameters.first() {
if let Some(pi_state) = state.get_pointer_inference_state() {
if let Ok(pointer) = pi_state.eval_parameter_arg(
dest_arg,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
let heap_to_string_map = state.get_heap_to_string_map();
for (target, _) in pointer.get_relative_values().iter() {
if heap_to_string_map.contains_key(target) {
new_state.remove_heap_to_string_entry(target);
}
}
}
}
}
new_state
}
}
#[cfg(test)]
pub mod tests;
use anyhow::Error;
use crate::abstract_domain::AbstractIdentifier;
use crate::analysis::pointer_inference::State as PointerInferenceState;
use crate::{
abstract_domain::{
AbstractDomain, DataDomain, DomainInsertion, HasTop, IntervalDomain, TryToBitvec,
},
analysis::string_abstraction::{context::Context, state::State},
intermediate_representation::ExternSymbol,
};
use std::collections::BTreeMap;
use crate::prelude::*;
impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Context<'a, T> {
/// Handles the detection of string parameters to memcpy calls.
pub fn handle_memcpy_calls(&self, state: &State<T>, extern_symbol: &ExternSymbol) -> State<T> {
let mut new_state = state.clone();
if let Some(pi_state) = state.get_pointer_inference_state() {
if let Ok(return_target) = self.has_return_target(extern_symbol, pi_state) {
let mut input_target = None;
if let Ok(input_data) = self.has_input_target(extern_symbol, pi_state) {
input_target = Some(input_data)
}
self.process_domains_for_memcpy_calls(
&mut new_state,
pi_state,
return_target,
input_target,
);
}
}
new_state
}
/// Checks whether the first input parameter contains a return target.
pub fn has_return_target(
&self,
extern_symbol: &ExternSymbol,
pi_state: &PointerInferenceState,
) -> Result<DataDomain<IntervalDomain>, Error> {
if let Some(return_arg) = extern_symbol.parameters.first() {
if let Ok(return_data) = pi_state.eval_parameter_arg(
return_arg,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
if !return_data.get_relative_values().is_empty() {
return Ok(return_data);
}
}
}
Err(anyhow!("No return value"))
}
/// Checks whether the second input parameter contains a source target.
pub fn has_input_target(
&self,
extern_symbol: &ExternSymbol,
pi_state: &PointerInferenceState,
) -> Result<DataDomain<IntervalDomain>, Error> {
if let Some(input_arg) = extern_symbol.parameters.get(1) {
return pi_state.eval_parameter_arg(
input_arg,
&self.project.stack_pointer_register,
self.runtime_memory_image,
);
}
Err(anyhow!("No input values"))
}
/// Processes string domains in memcpy calls on a case by case basis.
///
/// - **Case 1**: Both the destination pointer domain and the source pointer domain have multiple
/// targets. In this case all targets of the destination pointer receive *Top* values as
/// it is unclear which source target correspondence to which destination target due to
/// path insentivity.
///
/// - **Case 2**: Only the destination pointer domain has multiple targets. In this case
/// it is checked whether a string domain is tracked at the corresponding source position. If so,
/// a new map entry is created for the string domain at all destination targets.
/// Otherwise, a *Top* value is created.
///
/// - **Case 3**: Both pointer domains have unique targets. In this case a potential string domain
/// is simply copied to the destination target.
///
/// Note that it is assumed that a memcpy input is always a string as it is part of the *string.h*
/// C header file.
pub fn process_domains_for_memcpy_calls(
&self,
state: &mut State<T>,
pi_state: &PointerInferenceState,
return_data: DataDomain<IntervalDomain>,
input_data: Option<DataDomain<IntervalDomain>>,
) {
let relative_return_targets = return_data.get_relative_values();
if let Some(input_target) = input_data {
if relative_return_targets.len() > 1
&& Context::<T>::has_multiple_targets(&input_target)
{
Context::<T>::add_new_string_abstract_domain(
state,
pi_state,
relative_return_targets,
T::create_top_value_domain(),
);
} else {
self.process_domains_for_memcpy_calls_with_one_unique_input(
state,
pi_state,
&input_target,
relative_return_targets,
)
}
} else {
Context::<T>::add_new_string_abstract_domain(
state,
pi_state,
relative_return_targets,
T::create_top_value_domain(),
);
}
}
/// Processes domains for memcpy calls where at least one of
/// the parameters contains a unique target.
pub fn process_domains_for_memcpy_calls_with_one_unique_input(
&self,
state: &mut State<T>,
pi_state: &PointerInferenceState,
input_target: &DataDomain<IntervalDomain>,
relative_return_targets: &BTreeMap<AbstractIdentifier, IntervalDomain>,
) {
let domain_from_global_constant: Option<T> = self.get_constant_target(input_target);
let mut domain_from_relative_targets: Option<T> = None;
if !input_target.get_relative_values().is_empty() {
domain_from_relative_targets =
Some(Context::<T>::merge_domains_from_multiple_pointer_targets(
state,
pi_state,
input_target.get_relative_values(),
));
}
let output_domain: Option<T> =
match (domain_from_global_constant, domain_from_relative_targets) {
(Some(constant), Some(relative)) => Some(constant.merge(&relative)),
(Some(constant), None) => Some(constant),
(None, Some(relative)) => Some(relative),
_ => None,
};
if let Some(output) = output_domain {
Context::<T>::add_new_string_abstract_domain(
state,
pi_state,
relative_return_targets,
output,
);
}
}
/// Returns the content of a global memory target if there is some.
pub fn get_constant_target(&self, input_target: &DataDomain<IntervalDomain>) -> Option<T> {
if let Some(global_address) = input_target.get_absolute_value() {
if let Ok(address_value) = global_address.try_to_bitvec() {
if let Some(constant_domain) = self.get_constant_string_domain(address_value) {
return Some(constant_domain);
}
}
}
None
}
/// Checks whether a data domain has multiple targets.
pub fn has_multiple_targets(data: &DataDomain<IntervalDomain>) -> bool {
let number_of_relative_targets = data.get_relative_values().len();
if let Some(global_address) = data.get_absolute_value() {
if global_address.try_to_bitvec().is_ok() {
// One global target + at least one relative target.
if number_of_relative_targets >= 1 {
return true;
}
} else {
// Multiple global targets.
return true;
}
// More than one relative target and no global targets.
} else if number_of_relative_targets > 1 {
return true;
}
false
}
}
#[cfg(test)]
mod tests {
use std::collections::{BTreeMap, BTreeSet, HashSet};
use crate::{
abstract_domain::{
AbstractIdentifier, AbstractLocation, CharacterInclusionDomain, CharacterSet,
},
analysis::pointer_inference::PointerInference as PointerInferenceComputation,
analysis::string_abstraction::{
context::symbol_calls::tests::Setup,
tests::mock_project_with_intraprocedural_control_flow,
},
intermediate_representation::{Bitvector, Tid, Variable},
utils::binary::RuntimeMemoryImage,
};
use super::*;
#[test]
fn test_handle_memcpy_calls_with_multiple_source_targets() {
let memcpy_symbol = ExternSymbol::mock_memcpy_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(memcpy_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("sp", 4)).unwrap(),
);
let heap_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("r5", 4)).unwrap(),
);
let mut parameter_pointer: DataDomain<IntervalDomain> =
DataDomain::from_target(stack_id.clone(), Bitvector::from_i32(4).into());
parameter_pointer.insert_relative_value(heap_id.clone(), Bitvector::from_i32(0).into());
setup
.state_before_call
.add_new_stack_offset_to_string_entry(
4,
CharacterInclusionDomain::from("a".to_string()),
);
setup
.state_before_call
.add_new_heap_to_string_entry(heap_id, CharacterInclusionDomain::from("b".to_string()));
setup
.pi_state_before_symbol_call
.set_register(&Variable::mock("r1", 4), parameter_pointer);
setup
.state_before_call
.set_pointer_inference_state(Some(setup.pi_state_before_symbol_call));
// Test Case: destination pointer has multiple targets and source pointer has a unique target.
let new_state = setup
.context
.handle_memcpy_calls(&setup.state_before_call, &memcpy_symbol);
let expected_domain = CharacterInclusionDomain::Value((
CharacterSet::Value(BTreeSet::new()),
CharacterSet::Value(vec!['a', 'b'].into_iter().collect()),
));
assert_eq!(
expected_domain,
*new_state
.get_stack_offset_to_string_map()
.get(&-60)
.unwrap()
);
}
#[test]
fn test_handle_memcpy_calls_with_unique_pointers() {
let memcpy_symbol = ExternSymbol::mock_memcpy_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(memcpy_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
// Test Case: destination and source pointer have unique targets and source is global constant
let new_state = setup
.context
.handle_memcpy_calls(&setup.state_before_call, &memcpy_symbol);
assert_eq!(
CharacterInclusionDomain::from("str1 str2 str3 str4".to_string()),
*new_state
.get_stack_offset_to_string_map()
.get(&-60)
.unwrap()
);
}
#[test]
fn test_has_return_target() {
let memcpy_symbol = ExternSymbol::mock_memcpy_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(memcpy_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("sp", 4)).unwrap(),
);
let expected_data: DataDomain<IntervalDomain> =
DataDomain::from_target(stack_id, IntervalDomain::mock_i32(-60, -60));
assert_eq!(
expected_data,
setup
.context
.has_return_target(&memcpy_symbol, &setup.pi_state_before_symbol_call)
.unwrap()
);
}
#[test]
fn test_has_input_target() {
let memcpy_symbol = ExternSymbol::mock_memcpy_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(memcpy_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let expected_data: DataDomain<IntervalDomain> =
DataDomain::from(Bitvector::from_i32(0x7000));
assert_eq!(
expected_data,
setup
.context
.has_input_target(&memcpy_symbol, &setup.pi_state_before_symbol_call)
.unwrap()
);
}
#[test]
fn test_process_domains_for_memcpy_calls() {
let memcpy_symbol = ExternSymbol::mock_memcpy_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(memcpy_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let return_targets = setup
.pi_state_before_symbol_call
.get_register_by_name("r0")
.unwrap();
let input_target: DataDomain<IntervalDomain> = DataDomain::from(
setup
.pi_state_before_symbol_call
.get_register_by_name("r1")
.unwrap()
.get_absolute_value()
.unwrap()
.clone(),
);
setup.context.process_domains_for_memcpy_calls(
&mut setup.state_before_call,
&setup.pi_state_before_symbol_call,
return_targets,
Some(input_target),
);
assert_eq!(
CharacterInclusionDomain::ci("str1 str2 str3 str4"),
*setup
.state_before_call
.get_stack_offset_to_string_map()
.get(&-60)
.unwrap()
);
}
#[test]
fn test_process_domains_for_memcpy_calls_with_one_unique_input() {
let memcpy_symbol = ExternSymbol::mock_memcpy_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(memcpy_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let return_targets = setup
.pi_state_before_symbol_call
.get_register_by_name("r0")
.unwrap()
.get_relative_values()
.clone();
let input_target: DataDomain<IntervalDomain> = DataDomain::from(
setup
.pi_state_before_symbol_call
.get_register_by_name("r1")
.unwrap()
.get_absolute_value()
.unwrap()
.clone(),
);
setup
.context
.process_domains_for_memcpy_calls_with_one_unique_input(
&mut setup.state_before_call,
&setup.pi_state_before_symbol_call,
&input_target,
&return_targets,
);
assert_eq!(
CharacterInclusionDomain::ci("str1 str2 str3 str4"),
*setup
.state_before_call
.get_stack_offset_to_string_map()
.get(&-60)
.unwrap()
);
}
#[test]
fn test_get_constant_target() {
let memcpy_symbol = ExternSymbol::mock_memcpy_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(memcpy_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let input_target = DataDomain::from(Bitvector::from_i32(0x7000));
assert_eq!(
CharacterInclusionDomain::ci("str1 str2 str3 str4"),
setup.context.get_constant_target(&input_target).unwrap()
);
}
#[test]
fn test_has_multiple_targets() {
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("sp", 4)).unwrap(),
);
let heap_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("r5", 4)).unwrap(),
);
// Test Case 1: Only one relative target.
let mut data: DataDomain<IntervalDomain> = DataDomain::mock_from_target_map(
vec![(stack_id.clone(), IntervalDomain::mock_i32(8, 8))]
.into_iter()
.collect(),
);
assert!(!Context::<CharacterInclusionDomain>::has_multiple_targets(
&data
));
// Test Case 2: On absolute value and at least one relative target.
data.set_absolute_value(Some(IntervalDomain::mock_i32(0x7000, 0x7000)));
assert!(Context::<CharacterInclusionDomain>::has_multiple_targets(
&data
));
// Test Case 3: Only an absolute value.
data.set_relative_values(BTreeMap::new());
assert!(!Context::<CharacterInclusionDomain>::has_multiple_targets(
&data
));
// Test Case 4: Multiple relative targets.
data.set_absolute_value(None);
data.insert_relative_value(stack_id, IntervalDomain::mock_i32(8, 8));
data.insert_relative_value(heap_id, IntervalDomain::mock_i32(0, 0));
assert!(Context::<CharacterInclusionDomain>::has_multiple_targets(
&data
));
// Test Case 5: Multiple absolute values.
data.set_absolute_value(Some(IntervalDomain::mock_i32(0x7000, 0x7008)));
data.set_relative_values(BTreeMap::new());
assert!(Context::<CharacterInclusionDomain>::has_multiple_targets(
&data
));
}
}
use std::collections::HashMap;
use anyhow::Error;
use itertools::izip;
use crate::abstract_domain::TryToBitvec;
use crate::analysis::pointer_inference::State as PointerInferenceState;
use crate::prelude::*;
use crate::{
abstract_domain::{AbstractDomain, DataDomain, DomainInsertion, HasTop, IntervalDomain},
analysis::string_abstraction::{context::Context, state::State},
intermediate_representation::{Arg, Datatype, ExternSymbol},
utils::arguments::get_variable_parameters,
};
impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Context<'a, T> {
/// Handles the detection of string parameters to scanf calls.
/// Adds new string abstract domains to the current state.
pub fn handle_scanf_calls(&self, state: &State<T>, extern_symbol: &ExternSymbol) -> State<T> {
let mut new_state = state.clone();
if let Some(pi_state) = state.get_pointer_inference_state() {
// Check whether the format string parameters can be parsed.
if let Ok(return_values) = get_variable_parameters(
self.project,
pi_state,
extern_symbol,
&self.format_string_index_map,
self.runtime_memory_image,
) {
self.create_abstract_domain_entries_for_function_return_values(
pi_state,
&mut new_state,
return_values.into_iter().map(|arg| (arg, None)).collect(),
);
}
}
new_state
}
/// Creates string abstract domains for return values of (s)scanf calls.
pub fn create_abstract_domain_entries_for_function_return_values(
&self,
pi_state: &PointerInferenceState,
state: &mut State<T>,
arg_to_value_map: HashMap<Arg, Option<String>>,
) {
for (argument, value) in arg_to_value_map.into_iter() {
if argument.get_data_type().unwrap() == Datatype::Pointer {
if let Ok(data) = pi_state.eval_parameter_arg(
&argument,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
if !data.get_relative_values().is_empty() {
Context::add_constant_or_top_value_to_return_locations(
state, pi_state, data, value,
);
}
}
}
}
}
/// Adds constant or *Top* value to return location given a pointer and a potential value.
pub fn add_constant_or_top_value_to_return_locations(
state: &mut State<T>,
pi_state: &PointerInferenceState,
return_target: DataDomain<IntervalDomain>,
value: Option<String>,
) {
if let Some(string) = value {
Context::add_new_string_abstract_domain(
state,
pi_state,
return_target.get_relative_values(),
T::from(string),
);
} else {
Context::add_new_string_abstract_domain(
state,
pi_state,
return_target.get_relative_values(),
T::create_top_value_domain(),
);
}
state.add_unassigned_return_pointer(return_target);
}
/// Handles calls to sscanf. If the source string is known, it is split by spaces
/// and for each substring a string abstract domain is generated at its corresponding location.
pub fn handle_sscanf_calls(&self, state: &State<T>, extern_symbol: &ExternSymbol) -> State<T> {
let mut new_state = state.clone();
if let Some(pi_state) = state.get_pointer_inference_state() {
if let Some(source_string_arg) = extern_symbol.parameters.first() {
if let Ok(source_string) = pi_state.eval_parameter_arg(
source_string_arg,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
if self.source_string_mapped_to_return_locations(
pi_state,
&mut new_state,
&source_string,
extern_symbol,
) {
return new_state;
}
}
}
new_state = self.handle_scanf_calls(&new_state, extern_symbol);
}
new_state
}
/// Maps the source string to the return locations of the call and returns an boolean
/// which indicates whether the operation was successful.
pub fn source_string_mapped_to_return_locations(
&self,
pi_state: &PointerInferenceState,
state: &mut State<T>,
source_string: &DataDomain<IntervalDomain>,
extern_symbol: &ExternSymbol,
) -> bool {
if let Some(global_address) = source_string.get_absolute_value() {
if let Ok(source_string) = self.runtime_memory_image.read_string_until_null_terminator(
&global_address
.try_to_bitvec()
.expect("Could not translate interval address to bitvector."),
) {
if let Ok(source_return_string_map) = self
.map_source_string_parameters_to_return_arguments(
pi_state,
extern_symbol,
source_string,
)
{
self.create_abstract_domain_entries_for_function_return_values(
pi_state,
state,
source_return_string_map,
);
return true;
}
}
}
false
}
/// Maps source strings parameters to return arguments for sscanf calls.
pub fn map_source_string_parameters_to_return_arguments(
&self,
pi_state: &PointerInferenceState,
extern_symbol: &ExternSymbol,
source_string: &str,
) -> Result<HashMap<Arg, Option<String>>, Error> {
if let Ok(all_parameters) = get_variable_parameters(
self.project,
pi_state,
extern_symbol,
&self.format_string_index_map,
self.runtime_memory_image,
) {
let return_values: Vec<String> =
source_string.split(' ').map(|s| s.to_string()).collect();
return Ok(Context::<T>::filter_out_all_non_string_args(
all_parameters,
return_values,
));
}
Err(anyhow!("Could not map source string to return parameters."))
}
/// Filters out all parameters that are not of type string.
pub fn filter_out_all_non_string_args(
all_parameters: Vec<Arg>,
return_values: Vec<String>,
) -> HashMap<Arg, Option<String>> {
izip!(all_parameters, return_values)
.filter_map(|(param, value)| {
if matches!(param.get_data_type(), Some(Datatype::Pointer)) {
Some((param, Some(value)))
} else {
None
}
})
.collect()
}
}
#[cfg(test)]
mod tests {
use std::collections::HashSet;
use crate::abstract_domain::{AbstractIdentifier, AbstractLocation, CharacterInclusionDomain};
use crate::analysis::pointer_inference::PointerInference as PointerInferenceComputation;
use crate::analysis::string_abstraction::tests::mock_project_with_intraprocedural_control_flow;
use crate::intermediate_representation::Variable;
use crate::utils::binary::RuntimeMemoryImage;
use super::super::tests::*;
use super::*;
#[test]
fn test_handle_scanf_calls() {
let scanf_symbol = ExternSymbol::mock_scanf_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(scanf_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let new_state = setup
.context
.handle_scanf_calls(&setup.state_before_call, &scanf_symbol);
let top_value = CharacterInclusionDomain::from("".to_string()).top();
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("sp", 4)).unwrap(),
);
assert!(new_state
.get_unassigned_return_pointer()
.contains(&DataDomain::from_target(
stack_id.clone(),
Bitvector::from_i32(-0x74).into(),
)));
assert!(new_state
.get_unassigned_return_pointer()
.contains(&DataDomain::from_target(
stack_id.clone(),
Bitvector::from_i32(-0x5e).into(),
)));
assert!(new_state
.get_unassigned_return_pointer()
.contains(&DataDomain::from_target(
stack_id.clone(),
Bitvector::from_i32(-0x4c).into(),
)));
assert!(new_state
.get_unassigned_return_pointer()
.contains(&DataDomain::from_target(
stack_id.clone(),
Bitvector::from_i32(-0x38).into(),
)));
assert_eq!(
*new_state
.get_stack_offset_to_string_map()
.get(&(-0x74 as i64))
.unwrap(),
top_value,
);
assert_eq!(
*new_state
.get_stack_offset_to_string_map()
.get(&(-0x5e as i64))
.unwrap(),
top_value,
);
assert_eq!(
*new_state
.get_stack_offset_to_string_map()
.get(&(-0x4c as i64))
.unwrap(),
top_value,
);
assert_eq!(
*new_state
.get_stack_offset_to_string_map()
.get(&(-0x38 as i64))
.unwrap(),
top_value,
);
}
#[test]
fn test_create_abstract_domain_entries_for_function_return_values_with_known_values() {
let r2_reg = Variable::mock("r2", 4);
let sscanf_symbol = ExternSymbol::mock_sscanf_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(sscanf_symbol.clone(), vec![true, true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("sp", 4)).unwrap(),
);
let mut arg_to_value_map: HashMap<Arg, Option<String>> = HashMap::new();
let register_arg = Arg::Register {
var: r2_reg.clone(),
data_type: Some(Datatype::Pointer),
};
let stack_arg = Arg::Stack {
offset: 0,
size: ByteSize::new(4),
data_type: Some(Datatype::Pointer),
};
arg_to_value_map.insert(register_arg, Some("a".to_string()));
arg_to_value_map.insert(stack_arg, Some("b".to_string()));
setup
.context
.create_abstract_domain_entries_for_function_return_values(
&setup.pi_state_before_symbol_call,
&mut setup.state_before_call,
arg_to_value_map,
);
assert_eq!(
*setup
.state_before_call
.get_stack_offset_to_string_map()
.get(&(-0x7c as i64))
.unwrap(),
CharacterInclusionDomain::from("a".to_string())
);
assert_eq!(
*setup
.state_before_call
.get_stack_offset_to_string_map()
.get(&(-0x92 as i64))
.unwrap(),
CharacterInclusionDomain::from("b".to_string())
);
assert!(setup
.state_before_call
.get_unassigned_return_pointer()
.contains(&DataDomain::from_target(
stack_id.clone(),
Bitvector::from_i32(-0x7c).into(),
)));
assert!(setup
.state_before_call
.get_unassigned_return_pointer()
.contains(&DataDomain::from_target(
stack_id.clone(),
Bitvector::from_i32(-0x92).into(),
)));
}
#[test]
fn test_create_abstract_domain_entries_for_function_return_values_with_unknown_values() {
let r1_reg = Variable::mock("r1", 4);
let scanf_symbol = ExternSymbol::mock_scanf_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(scanf_symbol.clone(), vec![false])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("sp", 4)).unwrap(),
);
let mut arg_to_value_map: HashMap<Arg, Option<String>> = HashMap::new();
let register_arg = Arg::Register {
var: r1_reg.clone(),
data_type: Some(Datatype::Pointer),
};
let stack_arg = Arg::Stack {
offset: 0,
size: ByteSize::new(4),
data_type: Some(Datatype::Pointer),
};
arg_to_value_map.insert(register_arg, None);
arg_to_value_map.insert(stack_arg, None);
setup
.context
.create_abstract_domain_entries_for_function_return_values(
&setup.pi_state_before_symbol_call,
&mut setup.state_before_call,
arg_to_value_map,
);
assert_eq!(
*setup
.state_before_call
.get_stack_offset_to_string_map()
.get(&(-0x74 as i64))
.unwrap(),
CharacterInclusionDomain::Top
);
assert_eq!(
*setup
.state_before_call
.get_stack_offset_to_string_map()
.get(&(-0x38 as i64))
.unwrap(),
CharacterInclusionDomain::Top
);
assert!(setup
.state_before_call
.get_unassigned_return_pointer()
.contains(&DataDomain::from_target(
stack_id.clone(),
Bitvector::from_i32(-0x74).into(),
)));
assert!(setup
.state_before_call
.get_unassigned_return_pointer()
.contains(&DataDomain::from_target(
stack_id.clone(),
Bitvector::from_i32(-0x38).into(),
)));
}
#[test]
fn test_add_constant_or_top_value_to_return_locations() {
let sscanf_symbol = ExternSymbol::mock_sscanf_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(sscanf_symbol.clone(), vec![false, false])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("sp", 4)).unwrap(),
);
let return_target: DataDomain<IntervalDomain> =
DataDomain::from_target(stack_id, IntervalDomain::mock(-124, -124));
Context::<CharacterInclusionDomain>::add_constant_or_top_value_to_return_locations(
&mut setup.state_before_call,
&setup.pi_state_before_symbol_call,
return_target.clone(),
Some("str1".to_string()),
);
assert_eq!(
*setup
.state_before_call
.get_stack_offset_to_string_map()
.get(&-124)
.unwrap(),
CharacterInclusionDomain::ci("str1")
);
assert!(setup
.state_before_call
.get_unassigned_return_pointer()
.contains(&return_target));
}
#[test]
fn test_handle_sscanf_calls_unknown_source_unknown_format() {
let sscanf_symbol = ExternSymbol::mock_sscanf_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(sscanf_symbol.clone(), vec![false, false])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let new_state = setup
.context
.handle_sscanf_calls(&setup.state_before_call, &sscanf_symbol);
assert!(new_state.get_stack_offset_to_string_map().is_empty());
assert!(new_state.get_unassigned_return_pointer().is_empty());
}
#[test]
fn test_handle_sscanf_calls_known_source_unknown_format() {
let sscanf_symbol = ExternSymbol::mock_sscanf_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(sscanf_symbol.clone(), vec![true, false])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let new_state = setup
.context
.handle_sscanf_calls(&setup.state_before_call, &sscanf_symbol);
assert!(new_state.get_unassigned_return_pointer().is_empty());
assert!(new_state.get_stack_offset_to_string_map().is_empty());
}
#[test]
fn test_handle_sscanf_calls_unknown_source_known_format() {
let sscanf_symbol = ExternSymbol::mock_sscanf_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(sscanf_symbol.clone(), vec![false, true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("sp", 4)).unwrap(),
);
let new_state = setup
.context
.handle_sscanf_calls(&setup.state_before_call, &sscanf_symbol);
assert_eq!(
*new_state
.get_stack_offset_to_string_map()
.get(&(-0x7c as i64))
.unwrap(),
CharacterInclusionDomain::Top
);
assert_eq!(
*new_state
.get_stack_offset_to_string_map()
.get(&(-0x92 as i64))
.unwrap(),
CharacterInclusionDomain::Top
);
assert!(new_state
.get_unassigned_return_pointer()
.contains(&DataDomain::from_target(
stack_id.clone(),
Bitvector::from_i32(-0x7c).into(),
)));
assert!(new_state
.get_unassigned_return_pointer()
.contains(&DataDomain::from_target(
stack_id.clone(),
Bitvector::from_i32(-0x92).into(),
)));
}
#[test]
fn test_handle_sscanf_calls_known_source_known_format() {
let sscanf_symbol = ExternSymbol::mock_sscanf_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(sscanf_symbol.clone(), vec![true, true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("sp", 4)).unwrap(),
);
let new_state = setup
.context
.handle_sscanf_calls(&setup.state_before_call, &sscanf_symbol);
assert_eq!(
*new_state
.get_stack_offset_to_string_map()
.get(&(-0x7c as i64))
.unwrap(),
CharacterInclusionDomain::from("str1".to_string())
);
assert_eq!(
*new_state
.get_stack_offset_to_string_map()
.get(&(-0x68 as i64))
.unwrap(),
CharacterInclusionDomain::from("str2".to_string())
);
assert_eq!(
*new_state
.get_stack_offset_to_string_map()
.get(&(-0x92 as i64))
.unwrap(),
CharacterInclusionDomain::from("str3".to_string())
);
assert_eq!(
*new_state
.get_stack_offset_to_string_map()
.get(&(-0x84 as i64))
.unwrap(),
CharacterInclusionDomain::from("str4".to_string())
);
assert!(new_state
.get_unassigned_return_pointer()
.contains(&DataDomain::from_target(
stack_id.clone(),
Bitvector::from_i32(-0x7c).into(),
)));
assert!(new_state
.get_unassigned_return_pointer()
.contains(&DataDomain::from_target(
stack_id.clone(),
Bitvector::from_i32(-0x68).into(),
)));
assert!(new_state
.get_unassigned_return_pointer()
.contains(&DataDomain::from_target(
stack_id.clone(),
Bitvector::from_i32(-0x92).into(),
)));
assert!(new_state
.get_unassigned_return_pointer()
.contains(&DataDomain::from_target(
stack_id.clone(),
Bitvector::from_i32(-0x84).into(),
)));
}
#[test]
fn test_source_string_mapped_to_return_locations() {
let source_string: DataDomain<IntervalDomain> =
DataDomain::from(Bitvector::from_i32(0x7000));
let sscanf_symbol = ExternSymbol::mock_sscanf_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(sscanf_symbol.clone(), vec![true, true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
setup.context.source_string_mapped_to_return_locations(
&setup.pi_state_before_symbol_call,
&mut setup.state_before_call,
&source_string,
&sscanf_symbol,
);
assert_eq!(
CharacterInclusionDomain::ci("str1"),
*setup
.state_before_call
.get_stack_offset_to_string_map()
.get(&-124)
.unwrap()
);
assert_eq!(
CharacterInclusionDomain::ci("str2"),
*setup
.state_before_call
.get_stack_offset_to_string_map()
.get(&-104)
.unwrap()
);
assert_eq!(
CharacterInclusionDomain::ci("str4"),
*setup
.state_before_call
.get_stack_offset_to_string_map()
.get(&-132)
.unwrap()
);
assert_eq!(
CharacterInclusionDomain::ci("str3"),
*setup
.state_before_call
.get_stack_offset_to_string_map()
.get(&-146)
.unwrap()
);
}
#[test]
fn test_map_source_string_parameters_to_return_arguments() {
let source_string = "str1 str2 str3 str4";
let sscanf_symbol = ExternSymbol::mock_sscanf_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(sscanf_symbol.clone(), vec![true, true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let expected_result: HashMap<Arg, Option<String>> = vec![
(
Arg::mock_pointer_register("r2", 4),
Some("str1".to_string()),
),
(
Arg::mock_pointer_register("r3", 4),
Some("str2".to_string()),
),
(
Arg::Stack {
offset: 0,
size: ByteSize::new(4),
data_type: Some(Datatype::Pointer),
},
Some("str3".to_string()),
),
(
Arg::Stack {
offset: 4,
size: ByteSize::new(4),
data_type: Some(Datatype::Pointer),
},
Some("str4".to_string()),
),
]
.into_iter()
.collect();
assert_eq!(
expected_result,
setup
.context
.map_source_string_parameters_to_return_arguments(
&setup.pi_state_before_symbol_call,
&sscanf_symbol,
source_string
)
.unwrap()
);
}
#[test]
fn test_filter_out_all_non_string_args() {
let args = vec![
Arg::mock_pointer_register("r0", 4),
Arg::mock_register("r1", 4),
];
let values = vec!["cat ".to_string(), "2".to_string()];
let expected_output: HashMap<Arg, Option<String>> = vec![(
Arg::mock_pointer_register("r0", 4),
Some("cat ".to_string()),
)]
.into_iter()
.collect();
assert_eq!(
expected_output,
Context::<CharacterInclusionDomain>::filter_out_all_non_string_args(args, values)
);
}
}
use regex::Match;
use crate::analysis::pointer_inference::State as PointerInferenceState;
use crate::{
abstract_domain::{
AbstractDomain, DataDomain, DomainInsertion, HasTop, IntervalDomain, TryToBitvec,
},
analysis::string_abstraction::{context::Context, state::State},
intermediate_representation::{Arg, Datatype, ExternSymbol},
utils::arguments::{get_input_format_string, get_variable_parameters},
};
impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Context<'a, T> {
/// Handles the detection of string parameters to sprintf and snprintf calls.
/// Is able to identify a string constant parameter and to insert it into the format string.
/// e.g. the format string is "cat %s" and the analysis detected that the input string
/// is a constant in memory, for instance "bash.sh".
/// Then the abstract string domain is constructed with the string "cat bash.sh".
pub fn handle_sprintf_and_snprintf_calls(
&self,
state: &State<T>,
extern_symbol: &ExternSymbol,
) -> State<T> {
let mut new_state = state.clone();
if let Some(return_arg) = extern_symbol.parameters.first() {
if let Some(pi_state) = state.get_pointer_inference_state() {
if let Ok(return_pointer) = pi_state.eval_parameter_arg(
return_arg,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
if !return_pointer.get_relative_values().is_empty() {
let format_string_index = self
.format_string_index_map
.get(&extern_symbol.name)
.unwrap();
self.parse_format_string_and_add_new_string_domain(
&mut new_state,
pi_state,
extern_symbol,
*format_string_index,
&return_pointer,
)
}
new_state.add_unassigned_return_pointer(return_pointer);
}
}
}
new_state
}
/// Gets the input format string, parses the input parameters and adds
/// the generated domain to the string maps.
pub fn parse_format_string_and_add_new_string_domain(
&self,
state: &mut State<T>,
pi_state: &PointerInferenceState,
extern_symbol: &ExternSymbol,
format_string_index: usize,
return_pointer: &DataDomain<IntervalDomain>,
) {
if let Ok(input_format_string) = get_input_format_string(
pi_state,
extern_symbol,
format_string_index,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
let returned_abstract_domain = self.create_string_domain_for_sprintf_snprintf(
pi_state,
state,
extern_symbol,
input_format_string,
);
Context::<T>::add_new_string_abstract_domain(
state,
pi_state,
return_pointer.get_relative_values(),
returned_abstract_domain,
);
} else {
Context::<T>::add_new_string_abstract_domain(
state,
pi_state,
return_pointer.get_relative_values(),
T::create_top_value_domain(),
);
}
}
/// Creates a string domain for a s(n)printf call by considering input constants
/// and other domains.
pub fn create_string_domain_for_sprintf_snprintf(
&self,
pi_state: &PointerInferenceState,
state: &State<T>,
extern_symbol: &ExternSymbol,
input_format_string: String,
) -> T {
if Context::<T>::no_specifiers(input_format_string.clone()) {
return T::from(input_format_string);
}
match get_variable_parameters(
self.project,
pi_state,
extern_symbol,
&self.format_string_index_map,
self.runtime_memory_image,
) {
Ok(var_args) => {
if var_args.is_empty() {
return T::create_top_value_domain();
}
self.create_string_domain_using_constants_and_sub_domains(
input_format_string,
&var_args,
pi_state,
state,
)
}
Err(_) => self.create_string_domain_using_data_type_approximations(input_format_string),
}
}
/// Creates a domain from a format string where all specifiers are approximated according
/// to their data type. This ensures that, if there is a long data type, that the domain is
/// no returned as *Top*.
pub fn create_string_domain_using_data_type_approximations(&self, format_string: String) -> T {
let re = Context::<T>::re_format_specifier();
let mut domains: Vec<T> = Vec::new();
let mut last_specifier_end = 0;
for (index, specifier) in re.find_iter(&format_string).enumerate() {
Context::push_constant_subsequences_before_and_between_specifiers(
&mut domains,
&format_string,
specifier,
last_specifier_end,
index,
);
Context::push_format_specifier_approximation(&mut domains, specifier);
last_specifier_end = specifier.end();
}
Context::push_constant_suffix_if_available(
&mut domains,
&format_string,
last_specifier_end,
);
Context::concat_domains(&mut domains)
}
/// Creates a string domain from found constants and sub domains.
pub fn create_string_domain_using_constants_and_sub_domains(
&self,
format_string: String,
var_args: &[Arg],
pi_state: &PointerInferenceState,
state: &State<T>,
) -> T {
let re = Context::<T>::re_format_specifier();
let mut domains: Vec<T> = Vec::new();
let mut last_specifier_end = 0;
for (index, (specifier, arg)) in re.find_iter(&format_string).zip(var_args).enumerate() {
Context::push_constant_subsequences_before_and_between_specifiers(
&mut domains,
&format_string,
specifier,
last_specifier_end,
index,
);
domains.push(self.fetch_constant_and_domain_for_format_specifier(
arg,
specifier.as_str().to_string(),
pi_state,
state,
));
last_specifier_end = specifier.end();
}
Context::push_constant_suffix_if_available(
&mut domains,
&format_string,
last_specifier_end,
);
Context::concat_domains(&mut domains)
}
/// Creates a string domain by approximating a format specifier and pushes it to the domain vector.
pub fn push_format_specifier_approximation(domains: &mut Vec<T>, specifier: Match) {
domains.push(Context::<T>::approximate_string_domain_from_datatype(
Context::<T>::trim_format_specifier(specifier.as_str().to_string()),
));
}
/// Creates string domains from constant subsequences that either appear
/// at the beginning of the format string or between specifiers.
pub fn push_constant_subsequences_before_and_between_specifiers(
domains: &mut Vec<T>,
format_string: &str,
specifier: Match,
last_specifier_end: usize,
index: usize,
) {
if index == 0 {
if specifier.start() > 0 {
domains.push(T::from(format_string[..specifier.start()].to_string()));
}
} else {
let between_specifiers =
format_string[last_specifier_end..specifier.start()].to_string();
if !between_specifiers.is_empty() {
domains.push(T::from(
format_string[last_specifier_end..specifier.start()].to_string(),
));
}
}
}
/// Pushes a potential constant suffix to the string domain vector.
pub fn push_constant_suffix_if_available(
domains: &mut Vec<T>,
format_string: &str,
last_specifier_end: usize,
) {
if last_specifier_end != format_string.len() {
domains.push(T::from(format_string[last_specifier_end..].to_string()));
}
}
/// Takes a vector of string domains and concatenates them.
pub fn concat_domains(domains: &mut Vec<T>) -> T {
let mut init_domain = domains.first().unwrap().clone();
domains.remove(0);
for remaining_domain in domains.iter() {
init_domain = init_domain.append_string_domain(remaining_domain);
}
init_domain
}
/// Checks whether the string has no format specifiers.
pub fn no_specifiers(format_string: String) -> bool {
!Context::<T>::re_format_specifier().is_match(&format_string)
}
/// Tries to fetch a constant or sub domain for the format specifier.
/// If no data is available, it approximates the sub domain corresponding to
/// the characters that can be contained in the data type.
pub fn fetch_constant_and_domain_for_format_specifier(
&self,
arg: &Arg,
specifier: String,
pi_state: &PointerInferenceState,
state: &State<T>,
) -> T {
if let Ok(data) = pi_state.eval_parameter_arg(
arg,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
let constant_domain: Option<T> = self.fetch_constant_domain_if_available(&data, arg);
if let Some(generated_domain) = Context::<T>::fetch_subdomains_if_available(
&data,
state,
pi_state,
arg,
constant_domain.clone(),
) {
return generated_domain;
}
if let Some(domain) = constant_domain {
return domain;
}
}
Context::<T>::approximate_string_domain_from_datatype(Context::<T>::trim_format_specifier(
specifier,
))
}
/// Removes the '%' character and any size number from a format specifier.
pub fn trim_format_specifier(specifier: String) -> String {
specifier
.as_str()
.trim_start_matches(&['%', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'][..])
.to_string()
}
/// Fetches subdomains if they are available for a pointer domain and merges a potential
/// constant domain into the result.
pub fn fetch_subdomains_if_available(
data: &DataDomain<IntervalDomain>,
state: &State<T>,
pi_state: &PointerInferenceState,
arg: &Arg,
constant_domain: Option<T>,
) -> Option<T> {
if !data.get_relative_values().is_empty() {
if let Some(data_type) = arg.get_data_type() {
if matches!(data_type, Datatype::Pointer) {
let mut generated_domain =
Context::<T>::merge_domains_from_multiple_pointer_targets(
state,
pi_state,
data.get_relative_values(),
);
if let Some(constant) = constant_domain {
generated_domain = generated_domain.merge(&constant);
}
return Some(generated_domain);
}
}
}
None
}
/// Takes a data domain and tries to get a constant value.
pub fn fetch_constant_domain_if_available(
&self,
data: &DataDomain<IntervalDomain>,
arg: &Arg,
) -> Option<T> {
if let Some(value) = data.get_absolute_value() {
if let Ok(value_vector) = value.try_to_bitvec() {
if let Some(data_type) = arg.get_data_type() {
match data_type {
Datatype::Char => {
if let Some(char_domain) = self.get_constant_char_domain(value_vector) {
return Some(char_domain);
}
}
Datatype::Integer => {
if let Some(integer_domain) =
Context::<T>::get_constant_integer_domain(value_vector)
{
return Some(integer_domain);
}
}
Datatype::Pointer => {
if let Some(string_domain) =
self.get_constant_string_domain(value_vector)
{
return Some(string_domain);
}
}
_ => (),
}
}
}
}
None
}
}
#[cfg(test)]
mod tests;
use std::collections::{BTreeSet, HashSet};
use super::*;
use crate::abstract_domain::{AbstractIdentifier, AbstractLocation};
use crate::analysis::pointer_inference::PointerInference as PointerInferenceComputation;
use crate::intermediate_representation::{Bitvector, Tid, Variable};
use crate::{
abstract_domain::{CharacterInclusionDomain, CharacterSet},
analysis::string_abstraction::{
context::symbol_calls::tests::Setup, tests::mock_project_with_intraprocedural_control_flow,
},
utils::binary::RuntimeMemoryImage,
};
#[test]
fn test_handle_sprintf_and_snprintf_calls() {
let sprintf_symbol = ExternSymbol::mock_sprintf_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(sprintf_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let new_state = setup
.context
.handle_sprintf_and_snprintf_calls(&setup.state_before_call, &sprintf_symbol);
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("sp", 4)).unwrap(),
);
let return_pointer: DataDomain<IntervalDomain> =
DataDomain::from_target(stack_id, IntervalDomain::from(Bitvector::from_i32(-84)));
assert_eq!(
return_pointer,
**new_state
.get_unassigned_return_pointer()
.into_iter()
.collect::<Vec<&DataDomain<IntervalDomain>>>()
.get(0)
.unwrap()
);
let expected_domain = CharacterInclusionDomain::Value((
CharacterSet::Value(
vec!['t', 'o', 'W', 'a', 'c', 'l', ' ', 'd', 'r', 'e', 'H']
.into_iter()
.collect(),
),
CharacterSet::Top,
));
assert_eq!(
expected_domain,
*new_state
.get_stack_offset_to_string_map()
.get(&-84)
.unwrap()
);
}
#[test]
fn test_parse_format_string_and_add_new_string_domain() {
let sprintf_symbol = ExternSymbol::mock_sprintf_symbol_arm();
let format_string_index: usize = 1;
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("sp", 4)).unwrap(),
);
let return_pointer: DataDomain<IntervalDomain> =
DataDomain::from_target(stack_id, IntervalDomain::from(Bitvector::from_i32(-84)));
let project = mock_project_with_intraprocedural_control_flow(
vec![(sprintf_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
setup.context.parse_format_string_and_add_new_string_domain(
&mut setup.state_before_call,
&setup.pi_state_before_symbol_call,
&sprintf_symbol,
format_string_index,
&return_pointer,
);
let expected_domain = CharacterInclusionDomain::Value((
CharacterSet::Value(
vec!['t', 'o', 'W', 'a', 'c', 'l', ' ', 'd', 'r', 'e', 'H']
.into_iter()
.collect(),
),
CharacterSet::Top,
));
assert_eq!(
expected_domain,
*setup
.state_before_call
.get_stack_offset_to_string_map()
.get(&-84)
.unwrap()
);
}
#[test]
fn test_create_string_domain_for_sprintf_snprintf() {
let sprintf_symbol = ExternSymbol::mock_sprintf_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(sprintf_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let expected_domain = CharacterInclusionDomain::Value((
CharacterSet::Value(
vec!['t', 'o', 'W', 'a', 'c', 'l', ' ', 'd', 'r', 'e', 'H']
.into_iter()
.collect(),
),
CharacterSet::Top,
));
assert_eq!(
expected_domain,
setup.context.create_string_domain_for_sprintf_snprintf(
&setup.pi_state_before_symbol_call,
&setup.state_before_call,
&sprintf_symbol,
"cat %s %s %s %s".to_string(),
)
);
}
#[test]
fn test_create_string_domain_using_data_type_approximations() {
let sprintf_symbol = ExternSymbol::mock_sprintf_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(sprintf_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let format_string = "Hello %d %s %c %f.";
let expected_domain = CharacterInclusionDomain::Value((
CharacterSet::Value(vec!['H', 'e', 'l', 'o', ' ', '.'].into_iter().collect()),
CharacterSet::Top,
));
assert_eq!(
expected_domain,
setup
.context
.create_string_domain_using_data_type_approximations(format_string.to_string())
);
}
#[test]
fn test_create_string_domain_using_constants_and_sub_domains() {
let sprintf_symbol = ExternSymbol::mock_sprintf_symbol_arm();
let string_arg = Arg::Register {
var: Variable::mock("r6", 4),
data_type: Some(Datatype::Pointer),
};
let integer_arg = Arg::Register {
var: Variable::mock("r7", 4),
data_type: Some(Datatype::Integer),
};
let char_arg = Arg::Register {
var: Variable::mock("r8", 4),
data_type: Some(Datatype::Char),
};
let var_args: Vec<Arg> = vec![string_arg, integer_arg, char_arg];
let format_string = "cat %s > %d %c";
let project = mock_project_with_intraprocedural_control_flow(
vec![(sprintf_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
setup.pi_state_before_symbol_call.set_register(
&Variable::mock("r6", 4),
DataDomain::from(IntervalDomain::new(
Bitvector::from_u64(0x3002),
Bitvector::from_u64(0x3002),
)),
);
setup.pi_state_before_symbol_call.set_register(
&Variable::mock("r7", 4),
DataDomain::from(IntervalDomain::new(
Bitvector::from_u64(2),
Bitvector::from_u64(2),
)),
);
setup.pi_state_before_symbol_call.set_register(
&Variable::mock("r8", 4),
DataDomain::from(IntervalDomain::new(
Bitvector::from_u64(0x42),
Bitvector::from_u64(0x42),
)),
);
let result_domain = setup
.context
.create_string_domain_using_constants_and_sub_domains(
format_string.to_string(),
&var_args,
&setup.pi_state_before_symbol_call,
&setup.state_before_call,
);
assert_eq!(
CharacterInclusionDomain::from("cat >HeloWrd2B".to_string()),
result_domain
)
}
#[test]
fn test_push_format_specifier_approximation() {
let mut domains: Vec<CharacterInclusionDomain> = vec![];
let format_string = "Hello %d welcome to %s and %c, %f.";
let re = Context::<CharacterInclusionDomain>::re_format_specifier();
let matches: Vec<Match> = re.find_iter(&format_string).into_iter().collect();
for mat in matches.into_iter() {
Context::<CharacterInclusionDomain>::push_format_specifier_approximation(&mut domains, mat);
}
assert_eq!(
vec![
CharacterInclusionDomain::create_integer_domain(),
CharacterInclusionDomain::Top,
CharacterInclusionDomain::Top,
CharacterInclusionDomain::create_float_value_domain(),
],
domains
);
}
#[test]
fn test_push_constant_subsequences_before_and_between_specifiers() {
let mut domains: Vec<CharacterInclusionDomain> = vec![];
let format_string = "Hello %s welcome to %s";
let re = Context::<CharacterInclusionDomain>::re_format_specifier();
let matches: Vec<Match> = re.find_iter(&format_string).into_iter().collect();
let mut specifier_ends: Vec<usize> = vec![0];
specifier_ends.push(matches.get(0).unwrap().end());
for (index, (mat, spec_end)) in itertools::zip(matches, specifier_ends)
.into_iter()
.enumerate()
{
Context::<CharacterInclusionDomain>::push_constant_subsequences_before_and_between_specifiers(&mut domains, format_string, mat, spec_end, index);
}
assert_eq!(
vec![
CharacterInclusionDomain::ci("Hello "),
CharacterInclusionDomain::ci(" welcome to ")
],
domains
);
}
#[test]
fn test_push_constant_suffix_if_available() {
let mut domains: Vec<CharacterInclusionDomain> = vec![];
Context::<CharacterInclusionDomain>::push_constant_suffix_if_available(
&mut domains,
"Hello world",
6,
);
assert_eq!(
CharacterInclusionDomain::ci("world"),
*domains.get(0).unwrap()
);
domains.clear();
Context::<CharacterInclusionDomain>::push_constant_suffix_if_available(
&mut domains,
"Hello world",
11,
);
assert_eq!(Vec::<CharacterInclusionDomain>::new(), domains);
Context::<CharacterInclusionDomain>::push_constant_suffix_if_available(
&mut domains,
"Hello world",
0,
);
assert_eq!(
CharacterInclusionDomain::ci("Hello world"),
*domains.get(0).unwrap()
);
domains.clear();
}
#[test]
fn test_concat_domains() {
assert_eq!(
CharacterInclusionDomain::ci("ab"),
Context::<CharacterInclusionDomain>::concat_domains(&mut vec![
CharacterInclusionDomain::ci("a"),
CharacterInclusionDomain::ci("b")
])
);
}
#[test]
fn test_no_specifiers() {
// Test Case 1: No specifiers in format string.
assert!(!Context::<CharacterInclusionDomain>::no_specifiers(
"%s".to_string()
));
// Test Case 2: Specifiers in format string.
assert!(Context::<CharacterInclusionDomain>::no_specifiers(
"hello".to_string()
));
}
#[test]
fn test_fetch_constant_and_domain_for_format_specifier() {
let sprintf_symbol = ExternSymbol::mock_sprintf_symbol_arm();
let string_arg = Arg::Register {
var: Variable::mock("r6", 4),
data_type: Some(Datatype::Pointer),
};
let integer_arg = Arg::Register {
var: Variable::mock("r7", 4),
data_type: Some(Datatype::Integer),
};
let char_arg = Arg::Register {
var: Variable::mock("r8", 4),
data_type: Some(Datatype::Char),
};
let project = mock_project_with_intraprocedural_control_flow(
vec![(sprintf_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let expected_domain = CharacterInclusionDomain::Value((
CharacterSet::Value(BTreeSet::new()),
CharacterSet::Value("-0123456789".chars().collect()),
));
// Test Case 1: Integer and no tracked value.
assert_eq!(
expected_domain,
setup
.context
.fetch_constant_and_domain_for_format_specifier(
&integer_arg,
"%d".to_string(),
&setup.pi_state_before_symbol_call,
&setup.state_before_call
)
);
// Test Case 2: String and no tracked value.
assert_eq!(
CharacterInclusionDomain::Top,
setup
.context
.fetch_constant_and_domain_for_format_specifier(
&string_arg,
"%S".to_string(),
&setup.pi_state_before_symbol_call,
&setup.state_before_call
)
);
// Test Case 3: Char and no tracked value.
assert_eq!(
CharacterInclusionDomain::Top,
setup
.context
.fetch_constant_and_domain_for_format_specifier(
&char_arg,
"%c".to_string(),
&setup.pi_state_before_symbol_call,
&setup.state_before_call
)
);
// Test Case 4: Integer and tracked constant.
setup.pi_state_before_symbol_call.set_register(
&Variable::mock("r7", 4),
DataDomain::from(IntervalDomain::new(
Bitvector::from_u64(2),
Bitvector::from_u64(2),
)),
);
assert_eq!(
CharacterInclusionDomain::from("2".to_string()),
setup
.context
.fetch_constant_and_domain_for_format_specifier(
&integer_arg,
"%d".to_string(),
&setup.pi_state_before_symbol_call,
&setup.state_before_call
)
);
// Test Case 5: Char and tracked constant.
setup.pi_state_before_symbol_call.set_register(
&Variable::mock("r8", 4),
DataDomain::from(IntervalDomain::new(
Bitvector::from_u32(0x42),
Bitvector::from_u32(0x42),
)),
);
assert_eq!(
CharacterInclusionDomain::from("B".to_string()),
setup
.context
.fetch_constant_and_domain_for_format_specifier(
&char_arg,
"%c".to_string(),
&setup.pi_state_before_symbol_call,
&setup.state_before_call
)
);
// Test Case 6: String and tracked constant.
setup.pi_state_before_symbol_call.set_register(
&Variable::mock("r6", 4),
DataDomain::from(IntervalDomain::new(
Bitvector::from_u32(0x3002),
Bitvector::from_u32(0x3002),
)),
);
assert_eq!(
CharacterInclusionDomain::from("Hello World".to_string()),
setup
.context
.fetch_constant_and_domain_for_format_specifier(
&string_arg,
"%s".to_string(),
&setup.pi_state_before_symbol_call,
&setup.state_before_call
)
);
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("sp", 4)).unwrap(),
);
let mut pointer: DataDomain<IntervalDomain> = DataDomain::from_target(
stack_id,
IntervalDomain::new(Bitvector::from_i32(16), Bitvector::from_i32(16)),
);
let heap_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("r9", 4)).unwrap(),
);
pointer.insert_relative_value(
heap_id.clone(),
IntervalDomain::new(Bitvector::from_i32(0), Bitvector::from_i32(0)),
);
setup
.state_before_call
.add_new_stack_offset_to_string_entry(16, CharacterInclusionDomain::from("a".to_string()));
setup
.state_before_call
.add_new_heap_to_string_entry(heap_id, CharacterInclusionDomain::from("b".to_string()));
// Test Case 5: String and tracked domain.
setup
.pi_state_before_symbol_call
.set_register(&Variable::mock("r6", 4), pointer);
let expected_domain = CharacterInclusionDomain::Value((
CharacterSet::Value(BTreeSet::new()),
CharacterSet::Value("ab".chars().collect()),
));
assert_eq!(
expected_domain,
setup
.context
.fetch_constant_and_domain_for_format_specifier(
&string_arg,
"%s".to_string(),
&setup.pi_state_before_symbol_call,
&setup.state_before_call
)
);
}
#[test]
fn test_trim_format_specifier() {
assert_eq!(
"s".to_string(),
Context::<CharacterInclusionDomain>::trim_format_specifier("%s".to_string())
);
assert_eq!(
"d".to_string(),
Context::<CharacterInclusionDomain>::trim_format_specifier("%02d".to_string())
);
}
#[test]
fn test_fetch_subdomains_if_available() {
let sprintf_symbol = ExternSymbol::mock_sprintf_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(sprintf_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
// Test Case 1: No relative targets.
assert_eq!(
None,
Context::<CharacterInclusionDomain>::fetch_subdomains_if_available(
&DataDomain::<IntervalDomain>::new_empty(4.into()),
&setup.state_before_call,
&setup.pi_state_before_symbol_call,
&Arg::mock_register("r0", 4),
None,
)
);
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("sp", 4)).unwrap(),
);
// Test Case 2: Target value is not of type string pointer.
assert_eq!(
None,
Context::<CharacterInclusionDomain>::fetch_subdomains_if_available(
&DataDomain::from_target(stack_id.clone(), IntervalDomain::mock(16, 16)),
&setup.state_before_call,
&setup.pi_state_before_symbol_call,
&Arg::mock_register("r0", 4),
None,
)
);
setup
.state_before_call
.add_new_stack_offset_to_string_entry(16, CharacterInclusionDomain::ci("Hello World"));
// Test Case 3: Target is of type string pointer.
assert_eq!(
Some(CharacterInclusionDomain::ci("Hello World")),
Context::<CharacterInclusionDomain>::fetch_subdomains_if_available(
&DataDomain::from_target(stack_id, IntervalDomain::mock(16, 16)),
&setup.state_before_call,
&setup.pi_state_before_symbol_call,
&Arg::mock_pointer_register("r0", 4),
None,
)
);
}
#[test]
fn test_fetch_constant_domain_if_available() {
let sprintf_symbol = ExternSymbol::mock_sprintf_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(sprintf_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let string_data: DataDomain<IntervalDomain> = DataDomain::from(Bitvector::from_i32(0x7000));
let string_arg: Arg = Arg::mock_pointer_register("r0", 4);
let integer_data: DataDomain<IntervalDomain> = DataDomain::from(Bitvector::from_i32(2));
let integer_arg: Arg = Arg::mock_register_with_data_type("r0", 4, Some(Datatype::Integer));
let char_data: DataDomain<IntervalDomain> = DataDomain::from(Bitvector::from_i32(0x61));
let char_arg: Arg = Arg::mock_register_with_data_type("r0", 4, Some(Datatype::Char));
assert_eq!(
Some(CharacterInclusionDomain::ci("str1 str2 str3 str4")),
setup
.context
.fetch_constant_domain_if_available(&string_data, &string_arg)
);
assert_eq!(
Some(CharacterInclusionDomain::ci("2")),
setup
.context
.fetch_constant_domain_if_available(&integer_data, &integer_arg)
);
assert_eq!(
Some(CharacterInclusionDomain::ci("a")),
setup
.context
.fetch_constant_domain_if_available(&char_data, &char_arg)
);
}
use crate::analysis::pointer_inference::State as PointerInferenceState;
use crate::{
abstract_domain::{AbstractDomain, DomainInsertion, HasTop, TryToBitvec},
analysis::string_abstraction::{context::Context, state::State},
intermediate_representation::ExternSymbol,
};
impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Context<'a, T> {
/// Handles the resulting string domain from strcat and strncat calls.
/// The symbol call returns the pointer to the destination string in its return register.
pub fn handle_strcat_and_strncat_calls(
&self,
state: &State<T>,
extern_symbol: &ExternSymbol,
) -> State<T> {
let mut new_state = state.clone();
if let Some(pi_state) = state.get_pointer_inference_state() {
if let Some(return_arg) = extern_symbol.parameters.first() {
if let Ok(return_pointer) = pi_state.eval_parameter_arg(
return_arg,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
if !return_pointer.get_relative_values().is_empty() {
let target_domain =
Context::<T>::merge_domains_from_multiple_pointer_targets(
state,
pi_state,
return_pointer.get_relative_values(),
);
Context::add_new_string_abstract_domain(
&mut new_state,
pi_state,
return_pointer.get_relative_values(),
target_domain.append_string_domain(&self.process_second_input_domain(
state,
extern_symbol,
pi_state,
)),
);
if let Ok(return_register) = extern_symbol.get_unique_return_register() {
new_state.add_new_variable_to_pointer_entry(
return_register.clone(),
return_pointer,
);
} else {
new_state.add_unassigned_return_pointer(return_pointer);
}
}
}
}
}
new_state
}
/// Processes the contents of the second input parameter.
pub fn process_second_input_domain(
&self,
state: &State<T>,
extern_symbol: &ExternSymbol,
pi_state: &PointerInferenceState,
) -> T {
let mut input_domain = T::create_top_value_domain();
if let Some(input_arg) = extern_symbol.parameters.get(1) {
if let Ok(input_value) = pi_state.eval_parameter_arg(
input_arg,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
// Check whether the second input string is in read only memory or on stack/heap.
if !input_value.get_relative_values().is_empty() {
input_domain = Context::<T>::merge_domains_from_multiple_pointer_targets(
state,
pi_state,
input_value.get_relative_values(),
);
}
if let Some(value) = input_value.get_absolute_value() {
if let Ok(global_address) = value.try_to_bitvec() {
if let Ok(input_string) = self
.runtime_memory_image
.read_string_until_null_terminator(&global_address)
{
if !input_domain.is_top() {
input_domain =
input_domain.merge(&T::from(input_string.to_string()));
} else {
input_domain = T::from(input_string.to_string());
}
}
}
}
}
}
input_domain
}
}
#[cfg(test)]
mod tests {
use std::collections::HashSet;
use crate::{
abstract_domain::{CharacterInclusionDomain, CharacterSet, IntervalDomain},
analysis::pointer_inference::PointerInference as PointerInferenceComputation,
analysis::string_abstraction::{
context::symbol_calls::tests::Setup,
tests::mock_project_with_intraprocedural_control_flow,
},
intermediate_representation::{ByteSize, Variable},
utils::binary::RuntimeMemoryImage,
};
use super::*;
#[test]
fn test_handle_strcat_and_strncat_calls_with_known_second_input() {
let strcat_symbol = ExternSymbol::mock_strcat_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(strcat_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let expected_domain = CharacterInclusionDomain::Value((
CharacterSet::Value(
vec!['s', 't', 'r', ' ', '1', '2', '3', '4']
.into_iter()
.collect(),
),
CharacterSet::Top,
));
let new_state = setup
.context
.handle_strcat_and_strncat_calls(&setup.state_before_call, &strcat_symbol);
assert_eq!(
expected_domain,
*new_state
.get_stack_offset_to_string_map()
.get(&(-0x3c as i64))
.unwrap()
);
}
#[test]
fn test_handle_strcat_and_strncat_calls_with_unknown_second_input() {
let strcat_symbol = ExternSymbol::mock_strcat_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(strcat_symbol.clone(), vec![false])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
// Test Case 1: No string domain is tracked for the second input.
let new_state = setup
.context
.handle_strcat_and_strncat_calls(&setup.state_before_call, &strcat_symbol);
assert_eq!(
CharacterInclusionDomain::Top,
*new_state
.get_stack_offset_to_string_map()
.get(&(-0x3c as i64))
.unwrap()
);
// Test Case 2: A string domain is tracked for the second input.
let expected_domain = CharacterInclusionDomain::Value((
CharacterSet::Value(vec!['a'].into_iter().collect()),
CharacterSet::Top,
));
setup
.state_before_call
.add_new_stack_offset_to_string_entry(
0x28,
CharacterInclusionDomain::from("a".to_string()),
);
let new_state = setup
.context
.handle_strcat_and_strncat_calls(&setup.state_before_call, &strcat_symbol);
assert_eq!(
expected_domain,
*new_state
.get_stack_offset_to_string_map()
.get(&(-0x3c as i64))
.unwrap()
);
}
#[test]
fn test_process_second_input_domain_global() {
let strcat_symbol = ExternSymbol::mock_strcat_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(strcat_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
assert_eq!(
CharacterInclusionDomain::ci("str1 str2 str3 str4"),
setup.context.process_second_input_domain(
&setup.state_before_call,
&strcat_symbol,
&setup.pi_state_before_symbol_call
)
);
}
#[test]
fn test_process_second_input_domain_local() {
let strcat_symbol = ExternSymbol::mock_strcat_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(strcat_symbol.clone(), vec![false])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
setup
.state_before_call
.add_new_stack_offset_to_string_entry(40, CharacterInclusionDomain::ci("abc"));
assert_eq!(
CharacterInclusionDomain::ci("abc"),
setup.context.process_second_input_domain(
&setup.state_before_call,
&strcat_symbol,
&setup.pi_state_before_symbol_call
)
);
}
#[test]
fn test_process_second_input_domain_local_and_global() {
let r1_reg = Variable::mock("r1", ByteSize::new(4));
let strcat_symbol = ExternSymbol::mock_strcat_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(strcat_symbol.clone(), vec![false])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let mut target_domain = setup.pi_state_before_symbol_call.get_register(&r1_reg);
target_domain.set_absolute_value(Some(IntervalDomain::mock(0x7000, 0x7000)));
setup
.pi_state_before_symbol_call
.set_register(&r1_reg, target_domain);
setup
.state_before_call
.add_new_stack_offset_to_string_entry(40, CharacterInclusionDomain::ci("str"));
let expected_domain = CharacterInclusionDomain::Value((
CharacterSet::Value(vec!['s', 't', 'r'].into_iter().collect()),
CharacterSet::Value(
vec!['s', 't', 'r', '1', '2', '3', '4', ' ']
.into_iter()
.collect(),
),
));
assert_eq!(
expected_domain,
setup.context.process_second_input_domain(
&setup.state_before_call,
&strcat_symbol,
&setup.pi_state_before_symbol_call
)
);
}
}
use std::collections::{HashMap, HashSet};
use std::fmt::Debug;
use petgraph::graph::NodeIndex;
use super::Context;
use crate::abstract_domain::{
AbstractDomain, CharacterInclusionDomain, CharacterSet, DataDomain, DomainInsertion, HasTop,
IntervalDomain,
};
use crate::analysis::forward_interprocedural_fixpoint::Context as _;
use crate::analysis::pointer_inference::PointerInference as PointerInferenceComputation;
use crate::analysis::pointer_inference::State as PiState;
use crate::analysis::string_abstraction::state::State;
use crate::analysis::string_abstraction::tests::*;
use crate::intermediate_representation::{Bitvector, ExternSymbol, Project, Sub};
use crate::{
abstract_domain::{AbstractIdentifier, AbstractLocation},
intermediate_representation::{Tid, Variable},
utils::{binary::RuntimeMemoryImage, symbol_utils::get_symbol_map},
};
pub struct Setup<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String> + Debug> {
pub context: Context<'a, T>,
pub pi_state_before_symbol_call: PiState,
pub state_before_call: State<T>,
}
impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String> + Debug> Setup<'a, T> {
pub fn new(pi_results: &'a PointerInferenceComputation<'a>) -> Self {
let mut pi_state = pi_results
.get_node_value(NodeIndex::new(0))
.unwrap()
.unwrap_value()
.clone();
let pi_context = pi_results.get_context();
// Get the pi state right before the call.
for def in pi_context
.project
.program
.term
.subs
.get(0)
.unwrap()
.term
.blocks
.get(0)
.unwrap()
.term
.defs
.iter()
{
pi_state = pi_context.update_def(&pi_state, def).unwrap();
}
let context: Context<T> = Context::mock(
&pi_context.project,
mock_string_symbol_map(&pi_context.project),
mock_format_index_map(),
&pi_results,
&pi_context.runtime_memory_image,
);
let state_before_call: State<T> = State::mock_with_given_pi_state(
pi_context.project.program.term.subs.get(0).unwrap().clone(),
pi_state.clone(),
);
Setup {
context,
pi_state_before_symbol_call: pi_state,
state_before_call,
}
}
}
fn mock_format_index_map() -> HashMap<String, usize> {
let mut map: HashMap<String, usize> = HashMap::new();
map.insert("sprintf".to_string(), 1);
map.insert("scanf".to_string(), 0);
map.insert("sscanf".to_string(), 1);
map
}
fn mock_string_symbol_map(project: &Project) -> HashMap<Tid, &ExternSymbol> {
get_symbol_map(
project,
&[
"sprintf".to_string(),
"scanf".to_string(),
"sscanf".to_string(),
"strcat".to_string(),
"memcpy".to_string(),
],
)
}
#[test]
fn test_handle_generic_symbol_calls() {
let memcpy_symbol = ExternSymbol::mock_memcpy_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(memcpy_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
setup.state_before_call.add_new_variable_to_pointer_entry(
Variable::mock("r1", 4),
DataDomain::from(IntervalDomain::from(Bitvector::from_i32(32))),
);
let new_state = setup
.context
.handle_generic_symbol_calls(&memcpy_symbol, &mut setup.state_before_call);
assert!(new_state.get_variable_to_pointer_map().is_empty());
}
#[test]
fn test_handle_unknown_symbol_calls() {
let memcpy_symbol = ExternSymbol::mock_memcpy_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(memcpy_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
setup.state_before_call.add_new_variable_to_pointer_entry(
Variable::mock("r1", 4),
DataDomain::from(IntervalDomain::from(Bitvector::from_i32(32))),
);
setup
.context
.handle_unknown_symbol_calls(&mut setup.state_before_call);
assert!(setup
.state_before_call
.get_variable_to_pointer_map()
.is_empty());
}
#[test]
fn test_add_new_string_abstract_domain() {
let sprintf_symbol = ExternSymbol::mock_sprintf_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(sprintf_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let mut state = State::mock_with_default_pi_state(Sub::mock("func"));
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("sp", 4)).unwrap(),
);
let stack_pointer = DataDomain::from_target(
stack_id.clone(),
Bitvector::zero(apint::BitWidth::from(4)).into(),
);
Context::<CharacterInclusionDomain>::add_new_string_abstract_domain(
&mut state,
&setup.pi_state_before_symbol_call,
&stack_pointer.get_relative_values(),
CharacterInclusionDomain::from("Hello World".to_string()),
);
assert!(state.get_stack_offset_to_string_map().contains_key(&0));
state.set_all_maps_empty();
let heap_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("r5", 4)).unwrap(),
);
let heap_pointer = DataDomain::from_target(
heap_id.clone(),
Bitvector::zero(apint::BitWidth::from(4)).into(),
);
Context::<CharacterInclusionDomain>::add_new_string_abstract_domain(
&mut state,
&setup.pi_state_before_symbol_call,
&heap_pointer.get_relative_values(),
CharacterInclusionDomain::Top,
);
assert!(state.get_heap_to_string_map().contains_key(&heap_id));
}
#[test]
fn test_merge_domains_from_multiple_pointer_targets() {
let sprintf_symbol = ExternSymbol::mock_sprintf_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(sprintf_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("sp", 4)).unwrap(),
);
let heap_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("r5", 4)).unwrap(),
);
let mut domain_pointer: DataDomain<IntervalDomain> =
DataDomain::from_target(stack_id.clone(), Bitvector::from_i32(0).into());
// Test Case 1: Single stack pointer with single target and no domain.
let returned_domain =
Context::<CharacterInclusionDomain>::merge_domains_from_multiple_pointer_targets(
&setup.state_before_call,
&setup.pi_state_before_symbol_call,
&domain_pointer.get_relative_values(),
);
assert_eq!(CharacterInclusionDomain::Top, returned_domain);
// Test Case 2: Single stack pointer with a domain.
setup
.state_before_call
.add_new_stack_offset_to_string_entry(0, CharacterInclusionDomain::from("a".to_string()));
let returned_domain =
Context::<CharacterInclusionDomain>::merge_domains_from_multiple_pointer_targets(
&setup.state_before_call,
&setup.pi_state_before_symbol_call,
&domain_pointer.get_relative_values(),
);
assert_eq!(
CharacterInclusionDomain::from("a".to_string()),
returned_domain
);
// Test Case 3: Stack and Heap pointer with two targets and only one points to a domain.
domain_pointer.insert_relative_value(heap_id.clone(), Bitvector::from_i32(0).into());
let returned_domain =
Context::<CharacterInclusionDomain>::merge_domains_from_multiple_pointer_targets(
&setup.state_before_call,
&setup.pi_state_before_symbol_call,
&domain_pointer.get_relative_values(),
);
assert_eq!(CharacterInclusionDomain::Top, returned_domain);
// Test Case 4: Stack and Heap pointer with two targets and both point to different domains.
setup
.state_before_call
.add_new_heap_to_string_entry(heap_id, CharacterInclusionDomain::from("b".to_string()));
let returned_domain =
Context::<CharacterInclusionDomain>::merge_domains_from_multiple_pointer_targets(
&setup.state_before_call,
&setup.pi_state_before_symbol_call,
&domain_pointer.get_relative_values(),
);
let expected_domain = CharacterInclusionDomain::Value((
CharacterSet::Value(vec![].into_iter().collect()),
CharacterSet::Value(vec!['a', 'b'].into_iter().collect()),
));
assert_eq!(expected_domain, returned_domain);
}
#[test]
fn test_handle_sprintf_and_snprintf_calls_known_format_string() {
let sprintf_symbol = ExternSymbol::mock_sprintf_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(sprintf_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let new_state = setup
.context
.handle_sprintf_and_snprintf_calls(&setup.state_before_call, &sprintf_symbol);
let expected_domain = CharacterInclusionDomain::Value((
CharacterSet::Value(
vec!['t', 'o', 'W', 'a', 'c', 'l', ' ', 'd', 'r', 'e', 'H']
.into_iter()
.collect(),
),
CharacterSet::Top,
));
assert_eq!(
*new_state
.get_stack_offset_to_string_map()
.get(&(-0x54 as i64))
.unwrap(),
expected_domain,
);
}
#[test]
fn test_handle_sprintf_and_snprintf_calls_unknown_format_string() {
let sprintf_symbol = ExternSymbol::mock_sprintf_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(sprintf_symbol.clone(), vec![false])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let new_state = setup
.context
.handle_sprintf_and_snprintf_calls(&setup.state_before_call, &sprintf_symbol);
assert_eq!(
CharacterInclusionDomain::Top,
*new_state
.get_stack_offset_to_string_map()
.get(&(-0x54 as i64))
.unwrap()
);
}
#[test]
fn test_insert_constant_integer_into_format_string() {
let string_with_insertion =
Context::<CharacterInclusionDomain>::get_constant_integer_domain(Bitvector::from_u32(2));
assert_eq!(
CharacterInclusionDomain::from("2".to_string()),
string_with_insertion.unwrap()
);
}
#[test]
fn test_insert_constant_char_into_format_string() {
let project = mock_project_with_intraprocedural_control_flow(
vec![(ExternSymbol::mock_sprintf_symbol_arm(), vec![false])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
// Test Case 1: Char is given as a hex constant in a register or stack position
let string_with_insertion = setup
.context
.get_constant_char_domain(Bitvector::from_u32(0x42));
assert_eq!(
CharacterInclusionDomain::from("B".to_string()),
string_with_insertion.unwrap()
);
// Test Case 2: Char is contained in the binary's read-only memory.
let string_with_insertion = setup
.context
.get_constant_char_domain(Bitvector::from_u32(0x3002));
assert_eq!(
CharacterInclusionDomain::from("H".to_string()),
string_with_insertion.unwrap()
);
}
#[test]
fn test_insert_constant_string_into_format_string() {
let project = mock_project_with_intraprocedural_control_flow(
vec![(ExternSymbol::mock_sprintf_symbol_arm(), vec![false])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
// Test Case 1: String contained in read-only memory.
let string_with_insertion = setup
.context
.get_constant_string_domain(Bitvector::from_u32(0x3002));
assert_eq!(
CharacterInclusionDomain::from("Hello World".to_string()),
string_with_insertion.unwrap()
);
}
#[test]
fn test_handle_free() {
let free_symbol = ExternSymbol::mock_free_symbol_arm();
let malloc_symbol = ExternSymbol::mock_malloc_symbol_arm();
let r0_reg = Variable::mock("r0", 4);
let project = mock_project_with_intraprocedural_control_flow(
vec![
(malloc_symbol.clone(), vec![]),
(free_symbol.clone(), vec![]),
],
"func",
);
let extern_subs: HashSet<Tid> = vec![malloc_symbol.tid, free_symbol.clone().tid]
.into_iter()
.collect();
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, extern_subs);
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let object_id = AbstractIdentifier::new(
Tid::new("func_malloc_0"),
AbstractLocation::from_var(&r0_reg).unwrap(),
);
setup
.state_before_call
.add_new_heap_to_string_entry(object_id.clone(), CharacterInclusionDomain::Top);
setup.state_before_call.set_pointer_inference_state(Some(
pi_results
.get_node_value(NodeIndex::new(2))
.unwrap()
.unwrap_value()
.clone(),
));
let new_state = setup
.context
.handle_free(&setup.state_before_call, &free_symbol);
assert!(!new_state.get_heap_to_string_map().contains_key(&object_id));
}
use super::*;
impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Context<'a, T> {
pub fn mock(
project: &'a Project,
string_symbols: HashMap<Tid, &'a ExternSymbol>,
format_string_index: HashMap<String, usize>,
pointer_inference_results: &'a PointerInferenceComputation<'a>,
runtime_memory_image: &'a RuntimeMemoryImage,
) -> Self {
let mut extern_symbol_map = HashMap::new();
for (tid, symbol) in project.program.term.extern_symbols.iter() {
extern_symbol_map.insert(tid.clone(), symbol);
}
let mut block_start_node_map = HashMap::new();
let mut block_first_def_set = HashSet::new();
let mut jmp_to_blk_end_node_map = HashMap::new();
for (node_id, node) in pointer_inference_results.get_graph().node_references() {
match node {
Node::BlkStart(block, sub) => {
if let Some(def) = block.term.defs.get(0) {
block_start_node_map.insert((def.tid.clone(), sub.tid.clone()), node_id);
block_first_def_set.insert((def.tid.clone(), sub.tid.clone()));
}
}
Node::BlkEnd(block, sub) => {
for jmp in block.term.jmps.iter() {
jmp_to_blk_end_node_map.insert((jmp.tid.clone(), sub.tid.clone()), node_id);
}
}
_ => (),
}
}
Context {
project,
runtime_memory_image,
pointer_inference_results,
string_symbol_map: string_symbols,
extern_symbol_map,
format_string_index_map: format_string_index,
block_start_node_map,
block_first_def_set,
jmp_to_blk_end_node_map: jmp_to_blk_end_node_map,
_phantom_string_domain: PhantomData,
}
}
}
use crate::{
abstract_domain::{AbstractDomain, DomainInsertion, HasTop},
analysis::string_abstraction::state::State,
intermediate_representation::{Blk, Def, Expression, Jmp, Term},
};
use super::Context;
impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>>
crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Context<'a, T>
{
type Value = State<T>;
/// Get the underlying graph on which the analysis operates.
fn get_graph(&self) -> &crate::analysis::graph::Graph<'a> {
self.pointer_inference_results.get_graph()
}
/// Merge two state values.
fn merge(&self, state1: &Self::Value, state2: &Self::Value) -> State<T> {
state1.merge(state2)
}
fn update_def(&self, state: &State<T>, def: &Term<Def>) -> Option<State<T>> {
let mut new_state = state.clone();
if state.get_pointer_inference_state().is_none() {
if self.block_first_def_set.contains(&(
def.tid.clone(),
state.get_current_sub().unwrap().tid.clone(),
)) {
if let Some(pi_state) = self.get_current_pointer_inference_state(state, &def.tid) {
new_state.set_pointer_inference_state(Some(pi_state));
} else {
return None;
}
} else {
return None;
}
}
self.update_pointer_inference_state(&mut new_state, def);
match &def.term {
Def::Assign {
var: output,
value: input,
} => {
new_state.handle_assign_and_load(
output,
input,
self.runtime_memory_image,
&self.block_first_def_set,
true,
);
}
Def::Load {
var: output,
address: input,
} => {
new_state.handle_assign_and_load(
output,
input,
self.runtime_memory_image,
&self.block_first_def_set,
false,
);
}
Def::Store { address, value } => new_state.handle_store(
address,
value,
self.runtime_memory_image,
&self.block_first_def_set,
),
}
Some(new_state)
}
fn update_jump(
&self,
state: &State<T>,
_jump: &Term<Jmp>,
_untaken_conditional: Option<&Term<Jmp>>,
_target: &Term<Blk>,
) -> Option<State<T>> {
let mut new_state = state.clone();
new_state.set_pointer_inference_state(None);
Some(new_state)
}
fn update_call(
&self,
_state: &State<T>,
_call: &Term<Jmp>,
_target: &crate::analysis::graph::Node,
) -> Option<State<T>> {
None
}
fn update_return(
&self,
_state: Option<&State<T>>,
state_before_call: Option<&State<T>>,
_call_term: &Term<Jmp>,
_return_term: &Term<Jmp>,
) -> Option<State<T>> {
if let Some(state) = state_before_call {
let mut new_state = state.clone();
self.handle_unknown_symbol_calls(&mut new_state);
new_state.set_pointer_inference_state(None);
return Some(new_state);
}
None
}
fn update_call_stub(&self, state: &State<T>, call: &Term<Jmp>) -> Option<State<T>> {
let mut new_state = state.clone();
match &call.term {
Jmp::Call { target, .. } => match self.extern_symbol_map.get(target) {
Some(symbol) => {
if let Some(string_symbol) = self.string_symbol_map.get(target) {
new_state = self.handle_string_symbol_calls(string_symbol, &new_state);
} else {
new_state = self.handle_generic_symbol_calls(symbol, &new_state);
}
}
None => panic!("Extern symbol not found."),
},
Jmp::CallInd { .. } => self.handle_unknown_symbol_calls(&mut new_state),
_ => panic!("Malformed control flow graph encountered."),
}
new_state.set_pointer_inference_state(None);
Some(new_state)
}
fn specialize_conditional(
&self,
state: &State<T>,
_condition: &Expression,
_block_before_condition: &Term<Blk>,
_is_true: bool,
) -> Option<State<T>> {
Some(state.clone())
}
}
#[cfg(test)]
mod tests;
use std::collections::HashSet;
use crate::{
abstract_domain::{
AbstractIdentifier, AbstractLocation, CharacterInclusionDomain, DataDomain, IntervalDomain,
},
analysis::pointer_inference::PointerInference as PointerInferenceComputation,
analysis::{
forward_interprocedural_fixpoint::Context,
string_abstraction::{
context::symbol_calls::tests::Setup,
tests::mock_project_with_intraprocedural_control_flow, tests::Setup as ProjectSetup,
},
},
intermediate_representation::{Bitvector, Blk, ByteSize, ExternSymbol, Jmp, Tid, Variable},
utils::binary::RuntimeMemoryImage,
};
#[test]
fn test_update_def() {
let memcpy_symbol = ExternSymbol::mock_memcpy_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(memcpy_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
setup.context.block_first_def_set = HashSet::new();
let project_setup = ProjectSetup::new();
let assign_def = project_setup.string_input_constant("assign_def", "r1", 0x7000);
let load_def = project_setup.load_var_content_from_temp_var("load_def", "r5", "r2");
let store_def = project_setup.store_var_content_at_temp_var("store_def", "r0", "r5");
let new_state = setup
.context
.update_def(&setup.state_before_call, &assign_def)
.unwrap();
let absolute_target = DataDomain::from(Bitvector::from_i32(0x7000));
assert_eq!(
absolute_target,
*new_state
.get_variable_to_pointer_map()
.get(&Variable::mock("r1", 4))
.unwrap()
);
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("sp", 4)).unwrap(),
);
let loaded_pointer = DataDomain::from_target(stack_id.clone(), IntervalDomain::mock_i32(4, 4));
let pointer_to_pointer =
DataDomain::from_target(stack_id.clone(), IntervalDomain::mock_i32(8, 8));
let _ = setup.pi_state_before_symbol_call.store_value(
&pointer_to_pointer,
&loaded_pointer,
&mem_image,
);
let r2_reg = Variable {
name: String::from("r2"),
size: ByteSize::new(4),
is_temp: true,
};
setup
.pi_state_before_symbol_call
.set_register(&r2_reg, pointer_to_pointer);
setup
.state_before_call
.set_pointer_inference_state(Some(setup.pi_state_before_symbol_call.clone()));
setup
.state_before_call
.add_new_variable_to_pointer_entry(Variable::mock("r3", 4), loaded_pointer.clone());
let new_state = setup
.context
.update_def(&setup.state_before_call, &load_def)
.unwrap();
assert_eq!(
loaded_pointer,
*new_state
.get_variable_to_pointer_map()
.get(&Variable::mock("r5", 4))
.unwrap()
);
let store_target = DataDomain::from_target(stack_id, IntervalDomain::mock_i32(12, 12));
let r0_reg = Variable {
name: String::from("r0"),
size: ByteSize::new(4),
is_temp: true,
};
setup
.pi_state_before_symbol_call
.set_register(&r0_reg, store_target);
setup
.pi_state_before_symbol_call
.set_register(&Variable::mock("r5", 4), absolute_target.clone());
setup
.state_before_call
.set_pointer_inference_state(Some(setup.pi_state_before_symbol_call));
let new_state = setup
.context
.update_def(&setup.state_before_call, &store_def)
.unwrap();
assert_eq!(
absolute_target,
*new_state
.get_stack_offset_to_pointer_map()
.get(&12)
.unwrap()
);
}
#[test]
fn test_update_jump() {
let memcpy_symbol = ExternSymbol::mock_memcpy_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(memcpy_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let new_state = setup
.context
.update_jump(
&setup.state_before_call,
&Jmp::branch("start1", "end1"),
Some(&Jmp::branch("start2", "end2")),
&Blk::mock(),
)
.unwrap();
assert_eq!(None, new_state.get_pointer_inference_state());
}
#[test]
fn test_update_return() {
let memcpy_symbol = ExternSymbol::mock_memcpy_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(memcpy_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let pointer = DataDomain::from(Bitvector::from_i32(0x6000));
let callee_saved_reg = Variable::mock("r11", 4);
let non_callee_saved_reg = Variable::mock("r0", 4);
setup
.state_before_call
.add_new_variable_to_pointer_entry(callee_saved_reg.clone(), pointer.clone());
setup
.state_before_call
.add_new_variable_to_pointer_entry(non_callee_saved_reg.clone(), pointer.clone());
let new_state = setup.context.update_return(
None,
None,
&Jmp::branch("start1", "end1"),
&Jmp::branch("start2", "end2"),
);
assert_eq!(None, new_state);
let new_state = setup
.context
.update_return(
Some(&setup.state_before_call),
Some(&setup.state_before_call),
&Jmp::branch("start1", "end1"),
&Jmp::branch("start2", "end2"),
)
.unwrap();
assert_eq!(None, new_state.get_pointer_inference_state());
assert_eq!(1, new_state.get_variable_to_pointer_map().len());
assert_eq!(
pointer,
*new_state
.get_variable_to_pointer_map()
.get(&callee_saved_reg)
.unwrap()
);
}
#[test]
fn test_update_call_stub() {
let memcpy_symbol = ExternSymbol::mock_memcpy_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(memcpy_symbol.clone(), vec![true])],
"func",
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project, &mem_image, &graph);
pi_results.compute();
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let call_to_memcpy = Jmp::call("jmp1", "memcpy", Some("blk1"));
let new_state = setup
.context
.update_call_stub(&setup.state_before_call, &call_to_memcpy)
.unwrap();
assert_eq!(
CharacterInclusionDomain::ci("str1 str2 str3 str4"),
*new_state
.get_stack_offset_to_string_map()
.get(&-60)
.unwrap()
);
}
//! A fixpoint analysis that abstracts strings in the program using various string abstract domains.
//! These include the Character Inclusion Domain and Bricks Domain among others.
use std::{
collections::{BTreeMap, HashMap},
fmt::Debug,
};
use crate::{
abstract_domain::{AbstractDomain, DomainInsertion, HasTop},
intermediate_representation::Project,
prelude::*,
utils::binary::RuntimeMemoryImage,
};
use self::state::State;
use super::{
fixpoint::Computation, forward_interprocedural_fixpoint::GeneralizedContext, graph::Graph,
interprocedural_fixpoint_generic::NodeValue,
pointer_inference::PointerInference as PointerInferenceComputation,
};
pub mod context;
pub mod state;
use context::*;
use petgraph::graph::NodeIndex;
/// Configurable parameters for the analysis.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Config {
/// Names of extern functions that manipulate strings
/// or could introduce new strings (e.g. scanf).
pub string_symbols: Vec<String>,
/// The index of the format string parameter in the function signature
/// of an external symbol.
pub format_string_index: BTreeMap<String, usize>,
}
/// A wrapper struct for the string abstraction computation object.
pub struct StringAbstraction<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> {
computation: Computation<GeneralizedContext<'a, Context<'a, T>>>,
}
impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>>
StringAbstraction<'a, T>
{
/// Generate a new string abstraction computation for a project.
pub fn new(
project: &'a Project,
runtime_memory_image: &'a RuntimeMemoryImage,
control_flow_graph: &'a Graph<'a>,
pointer_inference_results: &'a PointerInferenceComputation<'a>,
config: Config,
) -> StringAbstraction<'a, T> {
let context = Context::new(
project,
runtime_memory_image,
pointer_inference_results,
config,
);
let mut sub_to_entry_blocks_map = HashMap::new();
for sub in project.program.term.subs.iter() {
if let Some(entry_block) = sub.term.blocks.get(0) {
sub_to_entry_blocks_map.insert(sub.tid.clone(), entry_block.tid.clone());
}
}
let mut tid_to_graph_indices_map = HashMap::new();
for node in control_flow_graph.node_indices() {
if let super::graph::Node::BlkStart(block, sub) = control_flow_graph[node] {
tid_to_graph_indices_map.insert((block.tid.clone(), sub.tid.clone()), node);
}
}
let sub_to_entry_node_map: HashMap<Tid, NodeIndex> = sub_to_entry_blocks_map
.into_iter()
.filter_map(|(sub_tid, block_tid)| {
tid_to_graph_indices_map
.get(&(block_tid, sub_tid.clone()))
.map(|start_node_index| (sub_tid, *start_node_index))
})
.collect();
let mut fixpoint_computation =
super::forward_interprocedural_fixpoint::create_computation(context, None);
for (_, start_node_index) in sub_to_entry_node_map.into_iter() {
fixpoint_computation.set_node_value(
start_node_index,
super::interprocedural_fixpoint_generic::NodeValue::Value(State::new(
start_node_index,
pointer_inference_results,
)),
);
}
StringAbstraction {
computation: fixpoint_computation,
}
}
/// Compute the fixpoint of the string abstraction analysis.
/// Has a `max_steps` bound for the fixpoint algorithm to prevent infinite loops.
pub fn compute(&mut self) {
self.computation.compute_with_max_steps(100); // TODO: make max_steps configurable!
}
/// Get the string abstraction computation.
pub fn get_computation(&self) -> &Computation<GeneralizedContext<'a, Context<'a, T>>> {
&self.computation
}
/// Get the underlying graph of the computation.
pub fn get_graph(&self) -> &Graph {
self.computation.get_graph()
}
/// Get the context object of the computation.
pub fn get_context(&self) -> &Context<'a, T> {
self.computation.get_context().get_context()
}
/// Get the value associated to a node in the computed fixpoint
/// (or intermediate state of the algorithm if the fixpoint has not been reached yet).
/// Returns `None` if no value is associated to the Node.
pub fn get_node_value(&self, node_id: NodeIndex) -> Option<&NodeValue<State<T>>> {
self.computation.get_node_value(node_id)
}
}
/// Compute the string abstraction and return its results.
pub fn run<'a, T: AbstractDomain + HasTop + Eq + From<String> + DomainInsertion>(
project: &'a Project,
runtime_memory_image: &'a RuntimeMemoryImage,
control_flow_graph: &'a Graph<'a>,
pointer_inference: &'a PointerInferenceComputation<'a>,
config: Config,
) -> StringAbstraction<'a, T> {
let mut string_abstraction = StringAbstraction::new(
project,
runtime_memory_image,
control_flow_graph,
pointer_inference,
config,
);
string_abstraction.compute();
string_abstraction
}
#[cfg(test)]
pub mod tests;
//! The state module holds all information at CFG nodes that are generated from
//! the String Abstraction analysis.
//! Its content changes until a fixpoint is reached.
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use itertools::Itertools;
use petgraph::graph::NodeIndex;
use crate::abstract_domain::{DataDomain, DomainInsertion, HasTop, TryToBitvec};
use crate::intermediate_representation::{ExternSymbol, Project};
use crate::{abstract_domain::IntervalDomain, prelude::*};
use crate::{
abstract_domain::{AbstractDomain, AbstractIdentifier},
analysis::pointer_inference::PointerInference as PointerInferenceComputation,
analysis::pointer_inference::State as PointerInferenceState,
intermediate_representation::{Expression, Sub, Variable},
utils::binary::RuntimeMemoryImage,
};
/// Contains all information known about the state of a program at a specific point of time.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct State<T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> {
/// Keeps track of pointers that are returned by external calls
/// where the location is temporarily unknown.
unassigned_return_pointer: HashSet<DataDomain<IntervalDomain>>,
/// Maps registers to pointer which point to abstract string domains.
variable_to_pointer_map: HashMap<Variable, DataDomain<IntervalDomain>>,
/// Maps stack offsets to pointers that have been stored on the stack
/// These pointers point to abstract string domains.
stack_offset_to_pointer_map: HashMap<i64, DataDomain<IntervalDomain>>,
/// Tracks strings that lie directly on the stack.
/// Maps the stack offset to the abstract string domain.
stack_offset_to_string_map: HashMap<i64, T>,
/// Maps the heap abstract identifier of an memory object to the corresponding string abstract domain
/// representing its content.
/// For simplicity reasons it is assumed that a heap object only represents one string at offset 0.
heap_to_string_map: HashMap<AbstractIdentifier, T>,
/// Holds the currently analyzed subroutine term
current_sub: Arc<Option<Term<Sub>>>,
/// The state of the pointer inference analysis.
/// Used only for preventing unneccessary recomputation during handling of `Def`s in a basic block.
/// It is set when handling `Def`s (except for the first `Def` in a block)
/// provided that a corresponding pointer inference analysis state exists.
/// Otherwise the field is ignored (including in the [merge](State::merge)-function) and usually set to `None`.
#[serde(skip_serializing)]
pointer_inference_state: Option<PointerInferenceState>,
}
impl<T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> AbstractDomain for State<T> {
/// Merges two states.
fn merge(&self, other: &Self) -> Self {
let unassigned_return_pointer = self
.unassigned_return_pointer
.union(&other.unassigned_return_pointer)
.cloned()
.collect();
let mut variable_to_pointer_map = self.variable_to_pointer_map.clone();
for (var, other_pointer) in other.variable_to_pointer_map.iter() {
if let Some(pointer) = self.variable_to_pointer_map.get(var) {
variable_to_pointer_map.insert(var.clone(), pointer.merge(other_pointer));
} else {
variable_to_pointer_map.insert(var.clone(), other_pointer.clone());
}
}
let mut stack_offset_to_pointer_map = self.stack_offset_to_pointer_map.clone();
for (offset, other_pointer) in other.stack_offset_to_pointer_map.iter() {
if let Some(pointer) = self.stack_offset_to_pointer_map.get(offset) {
stack_offset_to_pointer_map.insert(*offset, pointer.merge(other_pointer));
} else {
stack_offset_to_pointer_map.insert(*offset, other_pointer.clone());
}
}
let mut stack_offset_to_string_map = self.stack_offset_to_string_map.clone();
for (offset, other_string_domain) in other.stack_offset_to_string_map.iter() {
if let Some(string_domain) = self.stack_offset_to_string_map.get(offset) {
stack_offset_to_string_map
.insert(*offset, string_domain.merge(other_string_domain));
} else {
stack_offset_to_string_map.insert(*offset, T::create_top_value_domain());
}
}
let mut heap_to_string_map = self.heap_to_string_map.clone();
for (id, other_string_domain) in other.heap_to_string_map.iter() {
if let Some(string_domain) = self.heap_to_string_map.get(id) {
heap_to_string_map.insert(id.clone(), string_domain.merge(other_string_domain));
} else {
heap_to_string_map.insert(id.clone(), T::create_top_value_domain());
}
}
let mut new_state = State {
unassigned_return_pointer,
variable_to_pointer_map,
stack_offset_to_pointer_map,
stack_offset_to_string_map,
heap_to_string_map,
current_sub: self.current_sub.clone(),
pointer_inference_state: self.pointer_inference_state.clone(),
};
new_state = new_state.delete_string_map_entries_if_no_pointer_targets_are_tracked();
new_state
}
/// The state has no explicit Top element.
fn is_top(&self) -> bool {
false
}
}
impl<T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> State<T> {
/// Creates a new state.
pub fn new(
node_index: NodeIndex,
pointer_inference_results: &PointerInferenceComputation,
) -> State<T> {
let mut pi_state: Option<PointerInferenceState> = None;
if let Some(pi_node) = pointer_inference_results.get_node_value(node_index) {
pi_state = Some(pi_node.unwrap_value().clone());
}
let mut current_sub = None;
if let Some(node) = pointer_inference_results
.get_graph()
.node_weight(node_index)
{
current_sub = Some(node.get_sub().clone());
}
State {
unassigned_return_pointer: HashSet::new(),
variable_to_pointer_map: HashMap::new(),
stack_offset_to_pointer_map: HashMap::new(),
stack_offset_to_string_map: HashMap::new(),
heap_to_string_map: HashMap::new(),
current_sub: Arc::new(current_sub),
pointer_inference_state: pi_state,
}
}
/// Removes all entries from the string maps.
pub fn set_all_maps_empty(&mut self) {
self.unassigned_return_pointer = HashSet::new();
self.heap_to_string_map = HashMap::new();
self.stack_offset_to_pointer_map = HashMap::new();
self.stack_offset_to_string_map = HashMap::new();
self.variable_to_pointer_map = HashMap::new();
}
/// Adds a return pointer to the unassigned return pointer set.
pub fn add_unassigned_return_pointer(&mut self, pointer: DataDomain<IntervalDomain>) {
self.unassigned_return_pointer.insert(pointer);
}
/// Returns the set of function return pointer that have not yet been assigned to
/// a memory location or register.
pub fn get_unassigned_return_pointer(&self) -> &HashSet<DataDomain<IntervalDomain>> {
&self.unassigned_return_pointer
}
/// Adds a new variable to pointer entry to the map.
pub fn add_new_variable_to_pointer_entry(
&mut self,
variable: Variable,
pointer: DataDomain<IntervalDomain>,
) {
self.variable_to_pointer_map.insert(variable, pointer);
}
/// Adds a new offset to string entry to the map.
pub fn add_new_stack_offset_to_string_entry(&mut self, offset: i64, string_domain: T) {
self.stack_offset_to_string_map
.insert(offset, string_domain);
}
/// Adds a new heap id to string entry to the map.
pub fn add_new_heap_to_string_entry(&mut self, heap_id: AbstractIdentifier, string_domain: T) {
self.heap_to_string_map.insert(heap_id, string_domain);
}
/// Removes a string from the heap to string map for the given abstract id.
pub fn remove_heap_to_string_entry(&mut self, heap_id: &AbstractIdentifier) {
self.heap_to_string_map.remove(heap_id);
}
/// Returns a reference to the variable to pointer map.
pub fn get_variable_to_pointer_map(&self) -> &HashMap<Variable, DataDomain<IntervalDomain>> {
&self.variable_to_pointer_map
}
/// Sets the variable to pointer map to a new value.
pub fn set_variable_to_pointer_map(
&mut self,
map: HashMap<Variable, DataDomain<IntervalDomain>>,
) {
self.variable_to_pointer_map = map;
}
/// Returns a reference to the variable to pointer map.
pub fn get_stack_offset_to_pointer_map(&self) -> &HashMap<i64, DataDomain<IntervalDomain>> {
&self.stack_offset_to_pointer_map
}
/// Returns a reference to the stack offset to string map.
pub fn get_stack_offset_to_string_map(&self) -> &HashMap<i64, T> {
&self.stack_offset_to_string_map
}
/// Returns a reference to the heap to string map.
pub fn get_heap_to_string_map(&self) -> &HashMap<AbstractIdentifier, T> {
&self.heap_to_string_map
}
/// Gets the current subroutine since the analysis is interprocedural.
pub fn get_current_sub(&self) -> Option<&Term<Sub>> {
match &*self.current_sub {
Some(sub) => Some(sub),
None => None,
}
}
/// Get the current pointer inference state if it is contained as an intermediate value in the state.
pub fn get_pointer_inference_state(&self) -> Option<&PointerInferenceState> {
self.pointer_inference_state.as_ref()
}
/// Set the current pointer inference state for `self`.
pub fn set_pointer_inference_state(&mut self, pi_state: Option<PointerInferenceState>) {
self.pointer_inference_state = pi_state;
}
/// Deletes all entries in the string maps that do not have corresponding pointers
/// in the pointer maps.
pub fn delete_string_map_entries_if_no_pointer_targets_are_tracked(&self) -> Self {
let mut new_state = self.clone();
if let Some(pi_state) = self.get_pointer_inference_state() {
let (stack_strings, heap_strings) = self.filter_string_map_entries(pi_state);
new_state.stack_offset_to_string_map = stack_strings;
new_state.heap_to_string_map = heap_strings;
}
new_state
}
/// Returns a vector of all currently tracked pointers.
pub fn collect_all_tracked_pointers(&self) -> Vec<DataDomain<IntervalDomain>> {
let mut pointers: Vec<DataDomain<IntervalDomain>> = self
.stack_offset_to_pointer_map
.iter()
.map(|(_, pointer)| pointer.clone())
.collect();
let mut var_pointers = self
.variable_to_pointer_map
.iter()
.map(|(_, pointer)| pointer.clone())
.collect();
let mut unassigned_pointers: Vec<DataDomain<IntervalDomain>> =
self.unassigned_return_pointer.iter().cloned().collect_vec();
pointers.append(&mut var_pointers);
pointers.append(&mut unassigned_pointers);
pointers
}
/// Removes all string entries for which the pointers are not tracked anymore.
pub fn filter_string_map_entries(
&self,
pi_state: &PointerInferenceState,
) -> (HashMap<i64, T>, HashMap<AbstractIdentifier, T>) {
let mut stack_strings: HashMap<i64, T> = HashMap::new();
let mut heap_strings: HashMap<AbstractIdentifier, T> = HashMap::new();
for pointer in self.collect_all_tracked_pointers().iter() {
for (target, offset) in pointer.get_relative_values().iter() {
if State::<T>::is_stack_pointer(pi_state, target) {
if let Ok(offset_value) = offset.try_to_offset() {
if let Some((key, value)) =
self.stack_offset_to_string_map.get_key_value(&offset_value)
{
stack_strings.insert(*key, value.clone());
}
}
} else if let Some((key, value)) = self.heap_to_string_map.get_key_value(target) {
heap_strings.insert(key.clone(), value.clone());
}
}
}
(stack_strings, heap_strings)
}
/// Evaluates the constant used as input of a Def Term.
/// It checks whether it is a constant address pointing to global read only
/// memory. If so, a pointer is added to the register map.
pub fn evaluate_constant(
&self,
runtime_memory_image: &RuntimeMemoryImage,
block_first_def_set: &HashSet<(Tid, Tid)>,
constant: Bitvector,
) -> Option<DataDomain<IntervalDomain>> {
if let Ok(address) = constant.try_to_u64() {
if !block_first_def_set.iter().any(|(def_tid, _)| {
u64::from_str_radix(def_tid.address.as_str(), 16).unwrap() == address
}) && runtime_memory_image.is_global_memory_address(&constant)
&& runtime_memory_image
.read_string_until_null_terminator(&constant)
.is_ok()
{
return Some(DataDomain::from(IntervalDomain::new(
constant.clone(),
constant,
)));
}
}
None
}
/// Handles assign and load Def Terms.
pub fn handle_assign_and_load(
&mut self,
output: &Variable,
input: &Expression,
runtime_memory_image: &RuntimeMemoryImage,
block_first_def_set: &HashSet<(Tid, Tid)>,
is_assign: bool,
) {
let mut is_string_pointer = false;
if let Some(pi_state) = self.clone().get_pointer_inference_state() {
is_string_pointer = self.check_if_output_is_string_pointer_and_add_targets(
pi_state,
output,
runtime_memory_image,
block_first_def_set,
)
} else if is_assign {
is_string_pointer = self.add_global_pointer_if_input_is_string_constant(
runtime_memory_image,
block_first_def_set,
output,
input,
)
}
// If the output variable is tracked and the new data is not a string pointer,
// remove the variable from the pointer map.
if !is_string_pointer {
self.variable_to_pointer_map.remove(output);
}
}
/// Checks whether the given pointer points to a string and adds missing targets
/// to the string maps as *Top* values.
pub fn check_if_output_is_string_pointer_and_add_targets(
&mut self,
pi_state: &PointerInferenceState,
output: &Variable,
runtime_memory_image: &RuntimeMemoryImage,
block_first_def_set: &HashSet<(Tid, Tid)>,
) -> bool {
let output_domain = pi_state.eval(&Expression::Var(output.clone()));
if let Some(value) = output_domain.get_absolute_value() {
if let Ok(constant) = value.try_to_bitvec() {
if let Some(global_pointer) =
self.evaluate_constant(runtime_memory_image, block_first_def_set, constant)
{
self.variable_to_pointer_map
.insert(output.clone(), global_pointer);
self.add_relative_targets_to_string_maps(pi_state, &output_domain);
return true;
}
}
} else if !output_domain.get_relative_values().is_empty() {
return self.pointer_added_to_variable_maps(pi_state, output, output_domain);
}
false
}
/// If the input is a string constant, add the global pointer to the variable map.
pub fn add_global_pointer_if_input_is_string_constant(
&mut self,
runtime_memory_image: &RuntimeMemoryImage,
block_first_def_set: &HashSet<(Tid, Tid)>,
output: &Variable,
input: &Expression,
) -> bool {
if let Expression::Const(constant) = input {
if let Some(global_pointer) =
self.evaluate_constant(runtime_memory_image, block_first_def_set, constant.clone())
{
self.variable_to_pointer_map
.insert(output.clone(), global_pointer);
return true;
}
}
false
}
/// Adds all relative targets of the given DataDomain to the string maps
/// if they are not already tracked.
pub fn add_relative_targets_to_string_maps(
&mut self,
pi_state: &PointerInferenceState,
pointer: &DataDomain<IntervalDomain>,
) {
for (target, offset) in pointer.get_relative_values().iter() {
if State::<T>::is_stack_pointer(pi_state, target) {
if let Ok(offset_value) = offset.try_to_offset() {
self.stack_offset_to_string_map
.entry(offset_value)
.or_insert_with(T::create_top_value_domain);
}
} else if !self.heap_to_string_map.contains_key(target) {
self.heap_to_string_map
.insert(target.clone(), T::create_top_value_domain());
}
}
}
/// Adds a pointer to the variable pointer maps if its targets were fully or partially tracked.
/// Returns true if it was added.
pub fn pointer_added_to_variable_maps(
&mut self,
pi_state: &PointerInferenceState,
output: &Variable,
loaded_pointer: DataDomain<IntervalDomain>,
) -> bool {
if self.unassigned_return_pointer.contains(&loaded_pointer) {
self.variable_to_pointer_map
.insert(output.clone(), loaded_pointer.clone());
self.unassigned_return_pointer.remove(&loaded_pointer);
true
} else if self.pointer_is_in_pointer_maps(&loaded_pointer)
|| self.pointer_targets_partially_tracked(pi_state, &loaded_pointer)
{
self.variable_to_pointer_map
.insert(output.clone(), loaded_pointer);
true
} else {
false
}
}
/// Adds a pointer to the stack pointer maps if its targets were fully or partially tracked.
pub fn pointer_added_to_stack_maps(
&mut self,
pi_state: &PointerInferenceState,
target_address: &Expression,
potential_string_pointer: DataDomain<IntervalDomain>,
) -> bool {
if self
.unassigned_return_pointer
.contains(&potential_string_pointer)
{
self.unassigned_return_pointer
.remove(&potential_string_pointer);
self.add_pointer_to_stack_map(target_address, potential_string_pointer);
true
} else if self.pointer_is_in_pointer_maps(&potential_string_pointer)
|| self.pointer_targets_partially_tracked(pi_state, &potential_string_pointer)
{
self.add_pointer_to_stack_map(target_address, potential_string_pointer);
true
} else {
false
}
}
/// If only some targets of a pointer point to tracked strings, add top values for the
/// other targets. It is assumed that all targets point to the same data type.
pub fn pointer_targets_partially_tracked(
&mut self,
pi_state: &PointerInferenceState,
pointer: &DataDomain<IntervalDomain>,
) -> bool {
let mut contains_string_target = false;
let mut new_stack_entries: Vec<i64> = Vec::new();
let mut new_heap_entries: Vec<AbstractIdentifier> = Vec::new();
for (target, offset) in pointer.get_relative_values().iter() {
if State::<T>::is_stack_pointer(pi_state, target) {
if let Ok(offset_value) = offset.try_to_offset() {
if self.stack_offset_to_string_map.contains_key(&offset_value) {
contains_string_target = true;
} else {
new_stack_entries.push(offset_value);
}
}
} else if self.heap_to_string_map.contains_key(target) {
contains_string_target = true;
} else {
new_heap_entries.push(target.clone());
}
}
if contains_string_target {
self.add_top_domain_values_for_additional_pointer_targets(
new_stack_entries,
new_heap_entries,
)
}
contains_string_target
}
/// Adds *Top* values to stack and heap maps for additional pointer targets.
pub fn add_top_domain_values_for_additional_pointer_targets(
&mut self,
new_stack_entries: Vec<i64>,
new_heap_entries: Vec<AbstractIdentifier>,
) {
for entry in new_stack_entries.iter() {
self.stack_offset_to_string_map
.insert(*entry, T::create_top_value_domain());
}
for entry in new_heap_entries.iter() {
self.heap_to_string_map
.insert(entry.clone(), T::create_top_value_domain());
}
}
/// Checks whether a given pointer is contained in one of the pointer maps.
pub fn pointer_is_in_pointer_maps(&self, pointer: &DataDomain<IntervalDomain>) -> bool {
self.stack_offset_to_pointer_map
.iter()
.any(|(_, tracked_value)| tracked_value == pointer)
|| self
.variable_to_pointer_map
.iter()
.any(|(_, tracked_value)| tracked_value == pointer)
}
/// Handles store Def Terms.
pub fn handle_store(
&mut self,
target_address: &Expression,
value: &Expression,
runtime_memory_image: &RuntimeMemoryImage,
block_first_def_set: &HashSet<(Tid, Tid)>,
) {
match value {
Expression::Const(constant) => {
if let Some(data) = self.evaluate_constant(
runtime_memory_image,
block_first_def_set,
constant.clone(),
) {
self.add_pointer_to_stack_map(target_address, data);
}
}
_ => {
if let Some(pi_state) = self.get_pointer_inference_state().cloned() {
let potential_string_pointer = pi_state.eval(value);
if !self.pointer_added_to_stack_maps(
&pi_state,
target_address,
potential_string_pointer.clone(),
) {
if let Some(constant) = potential_string_pointer.get_absolute_value() {
if let Ok(constant_value) = constant.try_to_bitvec() {
self.handle_store(
target_address,
&Expression::Const(constant_value),
runtime_memory_image,
block_first_def_set,
)
}
}
}
}
}
}
}
/// If a string pointer is to be stored on the stack, add it to the stack map.
pub fn add_pointer_to_stack_map(
&mut self,
target: &Expression,
string_pointer: DataDomain<IntervalDomain>,
) {
if let Some(pi_state) = self.get_pointer_inference_state().cloned() {
let pointer = pi_state.eval(target);
for (target, offset) in pointer.get_relative_values().iter() {
if State::<T>::is_stack_pointer(&pi_state, target) {
if let Ok(offset_value) = offset.try_to_offset() {
self.stack_offset_to_pointer_map
.insert(offset_value, string_pointer.clone());
}
}
}
}
}
/// Removes all non callee saved register entries from the variable to pointer map.
pub fn remove_non_callee_saved_pointer_entries_for_external_symbol(
&mut self,
project: &Project,
extern_symbol: &ExternSymbol,
) {
let cconv = extern_symbol.get_calling_convention(project);
let mut filtered_map = self.variable_to_pointer_map.clone();
for (register, _) in self.variable_to_pointer_map.clone().iter() {
if !cconv.callee_saved_register.contains(&register.name) {
if let Some(pointer) = filtered_map.remove(register) {
self.unassigned_return_pointer.insert(pointer);
}
}
}
self.variable_to_pointer_map = filtered_map;
}
/// Checks whether a target refers to the Stack.
pub fn is_stack_pointer(pi_state: &PointerInferenceState, target: &AbstractIdentifier) -> bool {
pi_state.caller_stack_ids.contains(target) || pi_state.stack_id == *target
}
}
#[cfg(test)]
mod tests;
use super::*;
use crate::{
abstract_domain::{AbstractLocation, CharacterInclusionDomain},
analysis::{
pointer_inference::State as PiState,
string_abstraction::tests::mock_project_with_intraprocedural_control_flow,
},
};
impl<T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> State<T> {
pub fn mock_with_default_pi_state(current_sub: Term<Sub>) -> Self {
let pi_state =
PointerInferenceState::new(&Variable::mock("sp", 4 as u64), current_sub.tid.clone());
State {
unassigned_return_pointer: HashSet::new(),
variable_to_pointer_map: HashMap::new(),
stack_offset_to_pointer_map: HashMap::new(),
stack_offset_to_string_map: HashMap::new(),
heap_to_string_map: HashMap::new(),
current_sub: Arc::new(Some(current_sub)),
pointer_inference_state: Some(pi_state),
}
}
pub fn mock_with_given_pi_state(current_sub: Term<Sub>, pi_state: PiState) -> Self {
State {
unassigned_return_pointer: HashSet::new(),
variable_to_pointer_map: HashMap::new(),
stack_offset_to_pointer_map: HashMap::new(),
stack_offset_to_string_map: HashMap::new(),
heap_to_string_map: HashMap::new(),
current_sub: Arc::new(Some(current_sub)),
pointer_inference_state: Some(pi_state),
}
}
pub fn _get_unassigned_return_pointer(&self) -> &HashSet<DataDomain<IntervalDomain>> {
&self.unassigned_return_pointer
}
}
#[test]
fn test_delete_string_map_entries_if_no_pointer_targets_are_tracked() {
let mut state: State<CharacterInclusionDomain> =
State::mock_with_default_pi_state(Sub::mock("func"));
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("sp", 4)).unwrap(),
);
let stack_pointer: DataDomain<IntervalDomain> = DataDomain::from_target(
stack_id.clone(),
Bitvector::zero(apint::BitWidth::from(4)).into(),
);
let heap_id_1 = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("r5", 4)).unwrap(),
);
let heap_pointer_1: DataDomain<IntervalDomain> = DataDomain::from_target(
heap_id_1.clone(),
Bitvector::zero(apint::BitWidth::from(4)).into(),
);
let heap_id_2 = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("r6", 4)).unwrap(),
);
let heap_pointer_2: DataDomain<IntervalDomain> = DataDomain::from_target(
heap_id_2.clone(),
Bitvector::zero(apint::BitWidth::from(4)).into(),
);
let heap_id_3 = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("r7", 4)).unwrap(),
);
state
.variable_to_pointer_map
.insert(Variable::mock("r0", 4), stack_pointer);
state.stack_offset_to_pointer_map.insert(-8, heap_pointer_1);
state.unassigned_return_pointer.insert(heap_pointer_2);
state
.stack_offset_to_string_map
.insert(0, CharacterInclusionDomain::Top); // pointer tracked
state
.stack_offset_to_string_map
.insert(4, CharacterInclusionDomain::Top); // will be deleted
state
.heap_to_string_map
.insert(heap_id_1.clone(), CharacterInclusionDomain::Top); // pointer tracked
state
.heap_to_string_map
.insert(heap_id_2.clone(), CharacterInclusionDomain::Top); // pointer tracked
state
.heap_to_string_map
.insert(heap_id_3.clone(), CharacterInclusionDomain::Top); // will be deleted
let new_state = state.delete_string_map_entries_if_no_pointer_targets_are_tracked();
assert_eq!(
new_state.stack_offset_to_string_map.get(&0),
Some(&CharacterInclusionDomain::Top)
);
assert_eq!(new_state.stack_offset_to_string_map.get(&4), None);
assert_eq!(
new_state.heap_to_string_map.get(&heap_id_1),
Some(&CharacterInclusionDomain::Top)
);
assert_eq!(
new_state.heap_to_string_map.get(&heap_id_2),
Some(&CharacterInclusionDomain::Top)
);
assert_eq!(new_state.heap_to_string_map.get(&heap_id_3), None);
}
#[test]
fn test_evaluate_constant() {
let runtime_memory_image = RuntimeMemoryImage::mock();
let constant = Bitvector::from_i32(0x7000);
let state: State<CharacterInclusionDomain> =
State::mock_with_default_pi_state(Sub::mock("func"));
let block_first_def_set: HashSet<(Tid, Tid)> = HashSet::new();
assert_eq!(
Some(DataDomain::from(Bitvector::from_i32(
constant.clone().try_to_i32().unwrap()
))),
state.evaluate_constant(&runtime_memory_image, &block_first_def_set, constant)
);
assert_eq!(
None,
state.evaluate_constant(
&runtime_memory_image,
&block_first_def_set,
Bitvector::from_i32(0x1234)
)
);
}
#[test]
fn test_handle_assign_and_load() {
let sub = Sub::mock("func");
let mut state: State<CharacterInclusionDomain> = State::mock_with_default_pi_state(sub.clone());
let runtime_memory_image = RuntimeMemoryImage::mock();
let output = Variable::mock("r1", 4);
let constant_input = Expression::Const(Bitvector::from_str_radix(16, "7000").unwrap());
let return_address_input = Expression::Const(Bitvector::from_str_radix(16, "14718").unwrap());
let other_input = Expression::var("r6", 4);
let mut block_first_def_set: HashSet<(Tid, Tid)> = HashSet::new();
let mut return_tid = Tid::new("14718");
return_tid.address = "14718".to_string();
block_first_def_set.insert((return_tid, sub.tid));
let constant_data_domain = DataDomain::from(Bitvector::from_i64(0x7000));
let mut pi_state = state.get_pointer_inference_state().unwrap().clone();
pi_state.set_register(&output, constant_data_domain.clone());
state.set_pointer_inference_state(Some(pi_state.clone()));
// Test Case 1: Assign Def with constant input
state.handle_assign_and_load(
&output,
&constant_input,
&runtime_memory_image,
&block_first_def_set,
true,
);
assert_eq!(
*state.variable_to_pointer_map.get(&output).unwrap(),
constant_data_domain
);
state.set_all_maps_empty();
// Test Case 1.1: Assign Def with constant input but no pi_state.
state.set_pointer_inference_state(None);
state.handle_assign_and_load(
&output,
&constant_input,
&runtime_memory_image,
&block_first_def_set,
true,
);
assert_eq!(
*state.variable_to_pointer_map.get(&output).unwrap(),
constant_data_domain
);
state.set_all_maps_empty();
// Test Case 1.2: Assign Def with constant input that is a return address
state.handle_assign_and_load(
&output,
&return_address_input,
&runtime_memory_image,
&block_first_def_set,
true,
);
assert!(state.variable_to_pointer_map.is_empty());
// Test Case 2: Assign Def with other input
let heap_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("r5", 4)).unwrap(),
);
let heap_pointer: DataDomain<IntervalDomain> = DataDomain::from_target(
heap_id.clone(),
Bitvector::zero(apint::BitWidth::from(4)).into(),
);
pi_state.set_register(&output, heap_pointer.clone());
state.set_pointer_inference_state(Some(pi_state.clone()));
state.unassigned_return_pointer.insert(heap_pointer.clone());
state.handle_assign_and_load(
&output,
&other_input,
&runtime_memory_image,
&block_first_def_set,
true,
);
assert_eq!(
*state.variable_to_pointer_map.get(&output).unwrap(),
heap_pointer
);
state.set_all_maps_empty();
// Test Case 3: Load Def with constant input
state.handle_assign_and_load(
&output,
&constant_input,
&runtime_memory_image,
&block_first_def_set,
false,
);
// Test Case 4: Load Def with other input
state.unassigned_return_pointer.insert(heap_pointer.clone());
state.handle_assign_and_load(
&output,
&other_input,
&runtime_memory_image,
&block_first_def_set,
false,
);
assert_eq!(
*state.variable_to_pointer_map.get(&output).unwrap(),
heap_pointer
);
}
#[test]
fn test_add_pointer_to_variable_maps_if_tracked() {
let output_var = Variable::mock("r2", 4);
let origin_var = Variable::mock("r5", 4);
let mut mock_state =
State::<CharacterInclusionDomain>::mock_with_default_pi_state(Sub::mock("func"));
let pi_state = mock_state.get_pointer_inference_state().unwrap().clone();
let heap_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("r5", 4)).unwrap(),
);
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&Variable::mock("sp", 4)).unwrap(),
);
let mut source_pointer: DataDomain<IntervalDomain> =
DataDomain::from_target(stack_id.clone(), Bitvector::from_i32(4).into());
// Test Case 1: Pointer is tracked in unassigned pointer map.
mock_state.add_unassigned_return_pointer(source_pointer.clone());
assert!(mock_state.pointer_added_to_variable_maps(
&pi_state,
&output_var,
source_pointer.clone()
));
mock_state.set_all_maps_empty();
// Test Case 2: Pointer is tracked in register to pointer map.
mock_state.add_new_variable_to_pointer_entry(origin_var, source_pointer.clone());
assert!(mock_state.pointer_added_to_variable_maps(
&pi_state,
&output_var,
source_pointer.clone()
));
assert_eq!(
source_pointer,
*mock_state
.get_variable_to_pointer_map()
.get(&output_var)
.unwrap()
);
mock_state.set_all_maps_empty();
// Test Case 3: Pointer is partially tracked.
source_pointer.insert_relative_value(heap_id.clone(), Bitvector::zero(32.into()).into());
mock_state.add_new_stack_offset_to_string_entry(4, CharacterInclusionDomain::Top);
assert!(mock_state.pointer_added_to_variable_maps(
&pi_state,
&output_var,
source_pointer.clone()
));
assert_eq!(
source_pointer,
*mock_state
.get_variable_to_pointer_map()
.get(&output_var)
.unwrap()
);
assert_eq!(
CharacterInclusionDomain::Top,
*mock_state.get_heap_to_string_map().get(&heap_id).unwrap()
);
mock_state.set_all_maps_empty();
// Test Case 4: Pointer is not tracked.
assert!(!mock_state.pointer_added_to_variable_maps(&pi_state, &output_var, source_pointer));
}
#[test]
fn test_pointer_targets_partially_tracked() {
let sp_reg = Variable::mock("sp", 4);
let mut mock_state =
State::<CharacterInclusionDomain>::mock_with_default_pi_state(Sub::mock("func"));
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&sp_reg).unwrap(),
);
let caller_stack_id = AbstractIdentifier::new(
Tid::new("caller_func"),
AbstractLocation::from_var(&sp_reg).unwrap(),
);
let mut string_pointer = DataDomain::from_target(
stack_id,
IntervalDomain::new(Bitvector::from_i32(0), Bitvector::from_i32(0)),
);
string_pointer.insert_relative_value(
caller_stack_id.clone(),
IntervalDomain::new(Bitvector::from_i32(-8), Bitvector::from_i32(-8)),
);
let mut pi_state = mock_state.get_pointer_inference_state().unwrap().clone();
pi_state.caller_stack_ids.insert(caller_stack_id);
mock_state.set_pointer_inference_state(Some(pi_state.clone()));
assert!(!mock_state.pointer_targets_partially_tracked(&pi_state, &string_pointer));
mock_state
.stack_offset_to_string_map
.insert(0, CharacterInclusionDomain::Top);
assert!(mock_state.pointer_targets_partially_tracked(&pi_state, &string_pointer));
assert!(mock_state.stack_offset_to_string_map.contains_key(&(-8)));
}
#[test]
fn test_pointer_is_in_pointer_maps() {
let r2_reg = Variable::mock("r2", 4);
let sp_reg = Variable::mock("sp", 4);
let mut mock_state =
State::<CharacterInclusionDomain>::mock_with_default_pi_state(Sub::mock("func"));
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&sp_reg).unwrap(),
);
let string_pointer = DataDomain::from_target(
stack_id,
IntervalDomain::new(Bitvector::from_i32(0), Bitvector::from_i32(0)),
);
assert!(!mock_state.pointer_is_in_pointer_maps(&string_pointer));
mock_state
.stack_offset_to_pointer_map
.insert(-4, string_pointer.clone());
assert!(mock_state.pointer_is_in_pointer_maps(&string_pointer));
mock_state.stack_offset_to_pointer_map.remove(&(-4i64));
mock_state
.variable_to_pointer_map
.insert(r2_reg, string_pointer.clone());
assert!(mock_state.pointer_is_in_pointer_maps(&string_pointer));
}
#[test]
fn test_handle_store() {
let block_first_def_set: HashSet<(Tid, Tid)> = HashSet::new();
let target_var = Variable::mock("r2", 4);
let value_var = Variable::mock("r3", 4);
let value_location = Expression::Var(value_var.clone());
let sp_reg = Variable::mock("sp", 4);
let target_location = Expression::Var(target_var.clone());
let runtime_memory_image = RuntimeMemoryImage::mock();
let mut mock_state =
State::<CharacterInclusionDomain>::mock_with_default_pi_state(Sub::mock("func"));
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&sp_reg).unwrap(),
);
let string_pointer = DataDomain::from_target(
stack_id.clone(),
IntervalDomain::new(Bitvector::from_i32(8), Bitvector::from_i32(8)),
);
let target_address: DataDomain<IntervalDomain> = DataDomain::from_target(
stack_id,
IntervalDomain::new(Bitvector::from_i32(0), Bitvector::from_i32(0)),
);
let mut pi_state = mock_state.get_pointer_inference_state().unwrap().clone();
pi_state.set_register(&target_var, target_address.clone());
pi_state.set_register(&value_var, string_pointer.clone());
pi_state
.store_value(&target_address, &string_pointer, &runtime_memory_image)
.unwrap();
mock_state.set_pointer_inference_state(Some(pi_state));
// Test Case 1: Pointer is no tracked string pointer.
mock_state.handle_store(
&target_location,
&value_location,
&runtime_memory_image,
&block_first_def_set,
);
assert!(mock_state.stack_offset_to_pointer_map.is_empty());
// Test Case 2: Pointer is an unassigned string pointer returned from a symbol call.
mock_state
.unassigned_return_pointer
.insert(string_pointer.clone());
mock_state.handle_store(
&target_location,
&value_location,
&runtime_memory_image,
&block_first_def_set,
);
assert_eq!(
string_pointer,
*mock_state.stack_offset_to_pointer_map.get(&(0i64)).unwrap()
);
assert!(mock_state.unassigned_return_pointer.is_empty());
// Test Case 3: Pointer is already tracked.
mock_state.set_all_maps_empty();
mock_state
.variable_to_pointer_map
.insert(Variable::mock("r0", 4), string_pointer.clone());
mock_state.handle_store(
&target_location,
&value_location,
&runtime_memory_image,
&block_first_def_set,
);
assert_eq!(
string_pointer,
*mock_state.stack_offset_to_pointer_map.get(&(0i64)).unwrap()
);
// Test Case 4: Pointer is partially tracked.
mock_state.set_all_maps_empty();
mock_state
.variable_to_pointer_map
.insert(Variable::mock("r0", 4), string_pointer.clone());
// Test Case 5: Global address pointer as constant.
// Test Case 6: Global address pointer in variable.
}
#[test]
fn test_add_pointer_to_stack_map() {
let r2_reg = Variable::mock("r2", 4);
let sp_reg = Variable::mock("sp", 4);
let target = Expression::Var(r2_reg.clone());
let mut mock_state =
State::<CharacterInclusionDomain>::mock_with_default_pi_state(Sub::mock("func"));
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&sp_reg).unwrap(),
);
let string_pointer: DataDomain<IntervalDomain> = DataDomain::from_target(
stack_id,
IntervalDomain::new(Bitvector::from_i32(0), Bitvector::from_i32(0)),
);
let mut pi_state = mock_state.get_pointer_inference_state().unwrap().clone();
pi_state.set_register(&r2_reg, string_pointer.clone());
mock_state.set_pointer_inference_state(Some(pi_state));
mock_state.add_pointer_to_stack_map(&target, string_pointer);
assert!(mock_state.stack_offset_to_pointer_map.contains_key(&0));
}
#[test]
fn test_remove_non_callee_saved_pointer_entries_for_external_symbol() {
let sprintf_symbol = ExternSymbol::mock_sprintf_symbol_arm();
let project = mock_project_with_intraprocedural_control_flow(
vec![(sprintf_symbol.clone(), vec![true])],
"func",
);
let mut mock_state = State::<CharacterInclusionDomain>::mock_with_default_pi_state(
project.program.term.subs.get(0).unwrap().clone(),
);
let top_domain = DataDomain::new_empty(ByteSize::new(4));
mock_state
.variable_to_pointer_map
.insert(Variable::mock("r0", 4), top_domain.clone());
mock_state
.variable_to_pointer_map
.insert(Variable::mock("r11", 4), top_domain);
mock_state
.remove_non_callee_saved_pointer_entries_for_external_symbol(&project, &sprintf_symbol);
assert!(!mock_state
.variable_to_pointer_map
.contains_key(&Variable::mock("r0", 4)));
assert!(mock_state
.variable_to_pointer_map
.contains_key(&Variable::mock("r11", 4)));
}
use crate::intermediate_representation::*;
pub struct Setup;
impl Setup {
pub fn new() -> Self {
Setup
}
pub fn format_string_constant(&self, tid: &str, register: &str) -> Term<Def> {
Def::assign(
tid,
Variable::mock(register, 4),
Expression::const_from_i32(0x6000),
)
}
pub fn string_input_constant(&self, tid: &str, register: &str, address: i32) -> Term<Def> {
Def::assign(
tid,
Variable::mock(register, 4),
Expression::const_from_i32(address),
)
}
// FIXME: Move this function to the intermediate_representation module
pub fn pointer_plus_offset(
&self,
tid: &str,
output: &str,
pointer: &str,
offset: i64,
) -> Term<Def> {
Def::assign(
tid,
Variable::mock(output, 4),
Expression::var(pointer, 4).plus_const(offset),
)
}
// FIXME: Move this function to the intermediate_representation module
pub fn pointer_minus_offset(
&self,
tid: &str,
output: &str,
pointer: &str,
offset: i64,
) -> Term<Def> {
Def::assign(
tid,
Variable::mock(output, 4),
Expression::var(pointer, 4).minus_const(offset),
)
}
// FIXME: Move this function to the intermediate_representation module
pub fn pointer_plus_offset_to_temp_var(
&self,
tid: &str,
tmp_name: &str,
pointer: &str,
offset: i64,
) -> Term<Def> {
Def::assign(
tid,
Variable {
name: String::from(tmp_name),
size: ByteSize::new(4),
is_temp: true,
},
Expression::var(pointer, 4).plus_const(offset),
)
}
// FIXME: Move this function to the intermediate_representation module
pub fn store_var_content_at_temp_var(&self, tid: &str, tmp_name: &str, var: &str) -> Term<Def> {
Def::store(
tid,
Expression::Var(Variable {
name: String::from(tmp_name),
size: ByteSize::new(4),
is_temp: true,
}),
Expression::var(var, 4),
)
}
// FIXME: Move this function to the intermediate_representation module
pub fn load_var_content_from_temp_var(
&self,
tid: &str,
var: &str,
tmp_name: &str,
) -> Term<Def> {
Def::load(
tid,
Variable::mock(var, 4 as u64),
Expression::Var(Variable {
name: String::from(tmp_name),
size: ByteSize::new(4),
is_temp: true,
}),
)
}
}
fn mock_defs_for_sprintf(format_known: bool, blk_num: usize) -> Vec<Term<Def>> {
let setup = Setup::new();
let mut defs: Vec<Term<Def>> = Vec::new();
/*
r11 = INT_ADD sp, 4:4
r12 = COPY 0x3002:4
r0 = INT_SUB r11, 0x58:4 // Destination string pointer
r1 = COPY 0x6000:4 // Constant format string
OR
r1 = INT_SUB r11, 0x62:4 // Variable format string
r2 = INT_ADD sp, 24:4 // Variable input in register
r3 = INT_ADD sp, 16:4 // Variable input in register
$U1050:4 = INT_ADD sp, 0:4 // Constant string input 'Hello World' on stack
STORE ram($U1050:4), r12
r12 = INT_ADD r11, 0x66:4
$U1050:4 = INT_ADD sp, 4:4 // Second variable input on stack
STORE ram($U1050:4), r12
*/
defs.push(setup.pointer_plus_offset(&format!("def_0_blk_{}", blk_num), "r11", "sp", 4));
defs.push(setup.string_input_constant(&format!("def_1_blk_{}", blk_num), "r12", 0x3002));
defs.push(setup.pointer_minus_offset(&format!("def_2_blk_{}", blk_num), "r0", "r11", 0x58));
if format_known {
defs.push(setup.format_string_constant(&format!("def_3_blk_{}", blk_num), "r1"));
} else {
defs.push(setup.pointer_minus_offset(&format!("def_3_blk_{}", blk_num), "r1", "r11", 0x62));
}
defs.push(setup.pointer_plus_offset(&format!("def_4_blk_{}", blk_num), "r2", "sp", 24));
defs.push(setup.pointer_plus_offset(&format!("def_5_blk_{}", blk_num), "r3", "sp", 16));
defs.push(setup.pointer_plus_offset_to_temp_var(
&format!("def_6_blk_{}", blk_num),
"$U1050",
"sp",
0,
));
defs.push(setup.store_var_content_at_temp_var(
&format!("def_7_blk_{}", blk_num),
"$U1050",
"r12",
));
defs.push(setup.pointer_plus_offset(&format!("def_8_blk_{}", blk_num), "r12", "r11", 0x66));
defs.push(setup.pointer_plus_offset_to_temp_var(
&format!("def_9_blk_{}", blk_num),
"$U1050",
"sp",
4,
));
defs.push(setup.store_var_content_at_temp_var(
&format!("def_10_blk_{}", blk_num),
"$U1050",
"r12",
));
defs
}
fn mock_defs_for_scanf(format_known: bool, blk_num: usize) -> Vec<Term<Def>> {
let setup = Setup::new();
let mut defs: Vec<Term<Def>> = Vec::new();
/*
r11 = INT_ADD sp, 4:4
r0 = INT_SUB r11, 0x3c:4
$U1050 = INT_ADD sp, 0:4
STORE ram($U1050:4), r0 - variable output 4
r3 = INT_SUB r11, 0x50:4 - variable output 3
r2 = INT_SUB r11, 0x62:4 - variable output 2
r1 = INT_SUB r11, 0x78:4 - variable output 1
r0 = LOAD ram(0x6000) - constant format string
OR
r0 = INT_SUB r11, 0x82:4 - variable format string
*/
defs.push(setup.pointer_plus_offset(&format!("def_0_blk_{}", blk_num), "r11", "sp", 4));
defs.push(setup.pointer_minus_offset(&format!("def_1_blk_{}", blk_num), "r0", "r11", 0x3c));
defs.push(setup.pointer_plus_offset_to_temp_var(
&format!("def_2_blk_{}", blk_num),
"$U1050",
"sp",
0,
));
defs.push(setup.store_var_content_at_temp_var(
&format!("def_3_blk_{}", blk_num),
"$U1050",
"r0",
));
defs.push(setup.pointer_minus_offset(&format!("def_4_blk_{}", blk_num), "r3", "r11", 0x50));
defs.push(setup.pointer_minus_offset(&format!("def_5_blk_{}", blk_num), "r2", "r11", 0x62));
defs.push(setup.pointer_minus_offset(&format!("def_6_blk_{}", blk_num), "r1", "r11", 0x78));
if format_known {
defs.push(setup.format_string_constant(&format!("def_7_blk_{}", blk_num), "r0"));
} else {
defs.push(setup.pointer_minus_offset(&format!("def_7_blk_{}", blk_num), "r0", "r11", 0x82));
}
defs
}
fn mock_defs_for_sscanf(source_known: bool, format_known: bool, blk_num: usize) -> Vec<Term<Def>> {
let setup = Setup::new();
let mut defs: Vec<Term<Def>> = Vec::new();
/*
r11 = INT_ADD sp, 4:4
r3 = INT_SUB r11, 0x96:4
$U1050:4 = INT_ADD sp, 0:4
STORE ram($U1050), r3 - variable string input 3
r3 = INT_SUB r11, 0x88:4
$U1050:4 = INT_ADD sp, 4:4
STORE ram($U1050), r3 - variable string input 4
r3 = INT_SUB r11, 0x6c:4 - variable string input 2
r2 = INT_SUB r11, 0x80:4 - variable string input 1
r1 = LOAD ram(0x6000) - constant format string
OR
r1 = INT_SUB r11, 0x40:4 - variable format string
r0 = LOAD ram(0x7000) - constant source string
OR
r0 = INT_SUB r11, 048:4 - variable source string
*/
defs.push(setup.pointer_plus_offset(&format!("def_0_blk_{}", blk_num), "r11", "sp", 4));
defs.push(setup.pointer_minus_offset(&format!("def_1_blk_{}", blk_num), "r3", "r11", 0x96));
defs.push(setup.pointer_plus_offset_to_temp_var(
&format!("def_2_blk_{}", blk_num),
"$U1050",
"sp",
0,
));
defs.push(setup.store_var_content_at_temp_var(
&format!("def_3_blk_{}", blk_num),
"$U1050",
"r3",
));
defs.push(setup.pointer_minus_offset(&format!("def_4_blk_{}", blk_num), "r3", "r11", 0x88));
defs.push(setup.pointer_plus_offset_to_temp_var(
&format!("def_5_blk_{}", blk_num),
"$U1050",
"sp",
4,
));
defs.push(setup.store_var_content_at_temp_var(
&format!("def_6_blk_{}", blk_num),
"$U1050",
"r3",
));
defs.push(setup.pointer_minus_offset(&format!("def_7_blk_{}", blk_num), "r3", "r11", 0x6c));
defs.push(setup.pointer_minus_offset(&format!("def_8_blk_{}", blk_num), "r2", "r11", 0x80));
if format_known {
defs.push(setup.format_string_constant(&format!("def_9_blk_{}", blk_num), "r1"));
} else {
defs.push(setup.pointer_minus_offset(&format!("def_9_blk_{}", blk_num), "r1", "r11", 0x40));
}
if source_known {
defs.push(setup.string_input_constant(&format!("def_10_blk_{}", blk_num), "r0", 0x7000));
} else {
defs.push(setup.pointer_minus_offset(
&format!("def_10_blk_{}", blk_num),
"r0",
"r11",
0x48,
));
}
defs
}
fn mock_defs_for_strcat(second_input_known: bool, blk_num: usize) -> Vec<Term<Def>> {
let setup = Setup::new();
let mut defs: Vec<Term<Def>> = Vec::new();
/*
r11 = INT_ADD sp, 4:4
r0 = INT_SUB r11, 40:4,
r1 = LOAD ram(0x7000)
OR
r1 = INT_ADD r11, 0x24:4
*/
defs.push(setup.pointer_plus_offset(&format!("def_0_blk_{}", blk_num), "r11", "sp", 4));
defs.push(setup.pointer_minus_offset(&format!("def_1_blk_{}", blk_num), "r0", "r11", 0x40));
if second_input_known {
defs.push(setup.string_input_constant(&format!("def_2_blk_{}", blk_num), "r1", 0x7000));
} else {
defs.push(setup.pointer_plus_offset(&format!("def_3_blk_{}", blk_num), "r1", "r11", 0x24));
}
defs
}
fn mock_defs_for_free(_blk_num: usize) -> Vec<Term<Def>> {
vec![]
}
fn mock_defs_for_malloc(blk_num: usize) -> Vec<Term<Def>> {
let setup = Setup::new();
let mut defs: Vec<Term<Def>> = Vec::new();
/*
r0 = COPY 0xf
*/
defs.push(setup.string_input_constant(&format!("def_0_blk_{}", blk_num), "r0", 0xf));
defs
}
fn mock_defs_for_memcpy(copy_from_global: bool, blk_num: usize) -> Vec<Term<Def>> {
let setup = Setup::new();
let mut defs: Vec<Term<Def>> = Vec::new();
/*
r11 = INT_ADD sp, 4:4
r0 = INT_SUB r11, 0x40:4,
r1 = LOAD ram(0x7000)
OR
r1 = INT_ADD r11, 0x24:4
*/
defs.push(setup.pointer_plus_offset(&format!("def_0_blk_{}", blk_num), "r11", "sp", 4));
defs.push(setup.pointer_minus_offset(&format!("def_1_blk_{}", blk_num), "r0", "r11", 0x40));
if copy_from_global {
defs.push(setup.string_input_constant(&format!("def_2_blk_{}", blk_num), "r1", 0x7000));
} else {
defs.push(setup.pointer_plus_offset(&format!("def_3_blk_{}", blk_num), "r1", "r11", 0x24));
}
defs
}
impl ExternSymbol {
pub fn mock_memcpy_symbol_arm() -> ExternSymbol {
ExternSymbol {
tid: Tid::new("memcpy"),
addresses: vec!["UNKNOWN".to_string()],
name: "memcpy".to_string(),
calling_convention: Some("__stdcall".to_string()),
parameters: vec![
Arg::mock_register("r0", 4),
Arg::mock_register("r1", 4),
Arg::mock_register("r2", 4),
],
return_values: vec![Arg::mock_register("r0", 4)],
no_return: false,
has_var_args: true,
}
}
pub fn mock_sprintf_symbol_arm() -> ExternSymbol {
ExternSymbol {
tid: Tid::new("sprintf"),
addresses: vec!["UNKNOWN".to_string()],
name: "sprintf".to_string(),
calling_convention: Some("__stdcall".to_string()),
parameters: vec![Arg::mock_register("r0", 4), Arg::mock_register("r1", 4)],
return_values: vec![Arg::mock_register("r0", 4)],
no_return: false,
has_var_args: true,
}
}
pub fn mock_scanf_symbol_arm() -> ExternSymbol {
ExternSymbol {
tid: Tid::new("scanf"),
addresses: vec!["UNKNOWN".to_string()],
name: "scanf".to_string(),
calling_convention: Some("__stdcall".to_string()),
parameters: vec![Arg::mock_register("r0", 4)],
return_values: vec![Arg::mock_register("r0", 4)],
no_return: false,
has_var_args: true,
}
}
pub fn mock_sscanf_symbol_arm() -> ExternSymbol {
ExternSymbol {
tid: Tid::new("sscanf"),
addresses: vec!["UNKNOWN".to_string()],
name: "sscanf".to_string(),
calling_convention: Some("__stdcall".to_string()),
parameters: vec![Arg::mock_register("r0", 4), Arg::mock_register("r1", 4)],
return_values: vec![Arg::mock_register("r0", 4)],
no_return: false,
has_var_args: true,
}
}
pub fn mock_strcat_symbol_arm() -> ExternSymbol {
ExternSymbol {
tid: Tid::new("strcat"),
addresses: vec!["UNKNOWN".to_string()],
name: "strcat".to_string(),
calling_convention: Some("__stdcall".to_string()),
parameters: vec![Arg::mock_register("r0", 4), Arg::mock_register("r1", 4)],
return_values: vec![Arg::mock_register("r0", 4)],
no_return: false,
has_var_args: false,
}
}
pub fn mock_free_symbol_arm() -> ExternSymbol {
ExternSymbol {
tid: Tid::new("free"),
addresses: vec!["UNKNOWN".to_string()],
name: "free".to_string(),
calling_convention: Some("__stdcall".to_string()),
parameters: vec![Arg::mock_register("r0", 4)],
return_values: vec![],
no_return: true,
has_var_args: false,
}
}
pub fn mock_malloc_symbol_arm() -> ExternSymbol {
ExternSymbol {
tid: Tid::new("malloc"),
addresses: vec!["UNKNOWN".to_string()],
name: "malloc".to_string(),
calling_convention: Some("__stdcall".to_string()),
parameters: vec![Arg::mock_register("r0", 4)],
return_values: vec![Arg::mock_register("r0", 4)],
no_return: false,
has_var_args: false,
}
}
}
impl CallingConvention {
pub fn mock_standard_arm_32() -> CallingConvention {
CallingConvention {
name: "__stdcall".to_string(), // so that the mock is useable as standard calling convention in tests
integer_parameter_register: ["r0", "r1", "r2", "r3"]
.iter()
.map(|s| s.to_string())
.collect(),
float_parameter_register: ["s0", "s1", "s2", "s3"]
.iter()
.map(|s| s.to_string())
.collect(),
return_register: vec!["r0".to_string()],
callee_saved_register: vec!["r11".to_string()],
}
}
}
impl DatatypeProperties {
pub fn mock_standard_arm_32() -> DatatypeProperties {
DatatypeProperties {
char_size: ByteSize::new(1),
double_size: ByteSize::new(8),
float_size: ByteSize::new(4),
integer_size: ByteSize::new(4),
long_double_size: ByteSize::new(8),
long_long_size: ByteSize::new(8),
long_size: ByteSize::new(4),
pointer_size: ByteSize::new(4),
short_size: ByteSize::new(2),
}
}
}
fn mock_abstract_string_call_to_external_function(
sub_name: &str,
symbol_name: &str,
blk_num: usize,
) -> Term<Jmp> {
let call_tid = format!("{}_{}_{}", sub_name, symbol_name, blk_num);
Jmp::call(
&call_tid,
&symbol_name,
Some(&format!("block{}", blk_num + 1)),
)
}
fn mock_block_with_function_call(
sub_name: &str,
symbol_name: &str,
config: &Vec<bool>,
blk_num: usize,
) -> Term<Blk> {
let mut blk = Blk::mock();
blk.tid = Tid::new(format!("block{}", blk_num));
let call = mock_abstract_string_call_to_external_function(sub_name, symbol_name, blk_num);
let defs: Vec<Term<Def>> = match symbol_name {
"sprintf" => mock_defs_for_sprintf(*config.get(0).unwrap(), blk_num),
"scanf" => mock_defs_for_scanf(*config.get(0).unwrap(), blk_num),
"sscanf" => mock_defs_for_sscanf(*config.get(0).unwrap(), *config.get(1).unwrap(), blk_num),
"strcat" => mock_defs_for_strcat(*config.get(0).unwrap(), blk_num),
"free" => mock_defs_for_free(blk_num),
"malloc" => mock_defs_for_malloc(blk_num),
"memcpy" => mock_defs_for_memcpy(*config.get(0).unwrap(), blk_num),
_ => panic!("Invalid symbol name for def mock"),
};
blk.term.defs = defs;
blk.term.jmps.push(call);
blk
}
fn mock_sub_with_name_and_symbol_calls(
name: &str,
symbols: Vec<(ExternSymbol, Vec<bool>)>,
) -> Term<Sub> {
let mut sub = Sub::mock(name);
let mut last_blk_num = 0;
for (blk_num, (symbol, config)) in symbols.iter().enumerate() {
sub.term.blocks.push(mock_block_with_function_call(
&sub.term.name,
&symbol.name,
config,
blk_num,
));
last_blk_num = blk_num;
}
let mut empty_blk = Blk::mock();
empty_blk.tid = Tid::new(format!("block{}", last_blk_num + 1));
sub.term.blocks.push(empty_blk);
sub
}
pub fn mock_project_with_intraprocedural_control_flow(
symbol_call_config: Vec<(ExternSymbol, Vec<bool>)>,
sub_name: &str,
) -> Project {
let mut program = Program::mock_empty();
program.subs.push(mock_sub_with_name_and_symbol_calls(
sub_name,
symbol_call_config,
));
let memcpy = ExternSymbol::mock_memcpy_symbol_arm();
program.extern_symbols.insert(memcpy.tid.clone(), memcpy);
let sprintf = ExternSymbol::mock_sprintf_symbol_arm();
program.extern_symbols.insert(sprintf.tid.clone(), sprintf);
let scanf = ExternSymbol::mock_scanf_symbol_arm();
program.extern_symbols.insert(scanf.tid.clone(), scanf);
let sscanf = ExternSymbol::mock_sscanf_symbol_arm();
program.extern_symbols.insert(sscanf.tid.clone(), sscanf);
let strcat = ExternSymbol::mock_strcat_symbol_arm();
program.extern_symbols.insert(strcat.tid.clone(), strcat);
let free = ExternSymbol::mock_free_symbol_arm();
program.extern_symbols.insert(free.tid.clone(), free);
let malloc = ExternSymbol::mock_malloc_symbol_arm();
program.extern_symbols.insert(malloc.tid.clone(), malloc);
program.entry_points.push(Tid::new(sub_name));
let register_list = ["r0", "r1", "r2", "r3", "r11", "sp"]
.iter()
.map(|name| Variable::mock(name, ByteSize::new(4)))
.collect();
Project {
program: Term {
tid: Tid::new("program"),
term: program,
},
cpu_architecture: "arm_32".to_string(),
stack_pointer_register: Variable::mock("sp", 4u64),
calling_conventions: vec![CallingConvention::mock_standard_arm_32()],
register_list,
datatype_properties: DatatypeProperties::mock_standard_arm_32(),
}
}
......@@ -8,28 +8,18 @@
//!
//! ## How the check works
//!
//! Using backward dataflow analysis we search for an executation path from a system call parameter (string) to an user input
//! to identify possible command injections.
//! The check depends entirely on the string abstraction analysis that is run beforehand.
//! The string abstraction uses a forward fixpoint analysis to determine potential strings at all
//! nodes in the CFG. More detailed information about the string abstraction can be found in the
//! corresponding files.
//!
//! To find relevant string related functions, such as sprintf, it is assumed that the first input parameter points
//! to the memory position that will be used as the return location. (e.g. char *strcat(char *dest, const char *src)
//! where 'char *dest' will contain the return value)
//!
//! For instance:
//!
//! ```txt
//! MOV RAX, qword ptr [RBP + local_10]
//! MOV RDI, RAX // RDI is the first input parameter for the strcat call and it points to [RBP + local_10]
//! CALL strcat
//! MOV RAX, qword ptr [RBP + local_10] // In the backwards analysis [RBP + local_10] will be tainted and it contains the return value
//! ```
//! The BricksDomain, a string abstract domain defining a string as a sequence of substring sets (bricks)
//! is used for this check. As it considers the order of characters, it can be further used for a manual
//! post analysis of the commands given to system calls.
//!
//! ### Symbols configurable in config.json
//!
//! The symbols are the functions which
//! 1. make system calls (e.g. system)
//! 2. manipulate strings (e.g. sprintf, strcat, memcpy, etc.)
//! 3. take user input (e.g. scanf)
//! The system calls considered in this check can be configured in the config.json.
//!
//! ## False Positives
//!
......@@ -38,32 +28,30 @@
//!
//! ## False Negatives
//!
//! - Missing Taints due to lost track of pointer targets
//! - Non tracked function parameters cause incomplete taints that could miss possible dangerous inputs
use std::collections::{BTreeMap, HashMap, HashSet};
use crate::{
analysis::{
backward_interprocedural_fixpoint::{create_computation, Context as _},
graph::{self, Edge, Node},
interprocedural_fixpoint_generic::NodeValue,
},
intermediate_representation::{ExternSymbol, Jmp, Project, Sub},
prelude::*,
utils::log::{CweWarning, LogMessage},
AnalysisResults, CweModule,
};
use petgraph::{
graph::NodeIndex,
visit::{EdgeRef, IntoNodeReferences},
};
mod state;
use state::*;
mod context;
use context::*;
//! - Missing substrings due to lost track of pointer targets
//! - Non tracked function parameters cause incomplete strings that could miss possible dangerous inputs
use petgraph::visit::EdgeRef;
use crate::CweModule;
use crate::abstract_domain::BricksDomain;
use crate::abstract_domain::TryToBitvec;
use crate::analysis::graph::Edge;
use crate::analysis::pointer_inference::State as PointerInferenceState;
use crate::analysis::string_abstraction::context::Context;
use crate::analysis::string_abstraction::state::State;
use crate::intermediate_representation::Arg;
use crate::intermediate_representation::ExternSymbol;
use crate::intermediate_representation::Jmp;
use crate::intermediate_representation::Sub;
use crate::prelude::*;
use crate::utils::binary::RuntimeMemoryImage;
use crate::utils::log::CweWarning;
use crate::utils::log::LogMessage;
use std::collections::BTreeMap;
use std::fmt::Debug;
/// The module name and version
pub static CWE_MODULE: CweModule = CweModule {
......@@ -77,82 +65,59 @@ pub static CWE_MODULE: CweModule = CweModule {
pub struct Config {
/// The names of the system call symbols
system_symbols: Vec<String>,
/// The names of the string manipulating symbols
string_symbols: Vec<String>,
/// The names of the user input symbols
user_input_symbols: Vec<String>,
/// Contains the index of the format string parameter
/// for external symbols.
format_string_index: HashMap<String, usize>,
}
/// This check searches for system calls and sets their parameters as taint source if available.
/// Then the fixpoint computation is executed and its result may generate cwe warnings if
/// the parameters can be tracked back to user inputs
/// This check checks the string parameter at system calls given by the string abstraction analysis
/// to find potential OS Command Injection vulnerabilities.
pub fn check_cwe(
analysis_results: &AnalysisResults,
cwe_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let project = analysis_results.project;
let pointer_inference_results = analysis_results.pointer_inference.unwrap();
let mut cwe_78_graph = analysis_results.control_flow_graph.clone();
cwe_78_graph.reverse();
let (cwe_sender, cwe_receiver) = crossbeam_channel::unbounded();
let config: Config = serde_json::from_value(cwe_params.clone()).unwrap();
let system_symbols =
crate::utils::symbol_utils::get_symbol_map(project, &config.system_symbols[..]);
let symbol_maps: SymbolMaps = SymbolMaps::new(project, &config);
let block_maps = BlockMaps::new(analysis_results);
let general_context = Context::new(
project,
analysis_results.runtime_memory_image,
std::sync::Arc::new(cwe_78_graph),
pointer_inference_results,
std::sync::Arc::new(symbol_maps),
std::sync::Arc::new(block_maps),
cwe_sender,
);
let entry_sub_to_entry_node_map = get_entry_sub_to_entry_node_map(project, &general_context);
let (cwe_sender, cwe_receiver): (
crossbeam_channel::Sender<CweWarning>,
crossbeam_channel::Receiver<CweWarning>,
) = crossbeam_channel::unbounded();
let (log_sender, log_receiver): (
crossbeam_channel::Sender<LogMessage>,
crossbeam_channel::Receiver<LogMessage>,
) = crossbeam_channel::unbounded();
let string_abstraction = analysis_results.string_abstraction.unwrap();
let system_symbol: Option<(Tid, ExternSymbol)> = string_abstraction
.get_context()
.project
.program
.term
.extern_symbols
.clone()
.into_iter()
.find(|(_, symbol)| config.system_symbols.contains(&symbol.name));
let string_graph = string_abstraction.get_graph();
for edge in general_context.get_pi_graph().edge_references() {
if let Some((_, system)) = system_symbol {
for edge in string_graph.edge_references() {
if let Edge::ExternCallStub(jmp) = edge.weight() {
if let Jmp::Call { target, .. } = &jmp.term {
if let Some(symbol) = system_symbols.get(target) {
let node = edge.source();
let current_sub = match general_context.get_pi_graph()[node] {
Node::BlkEnd(_blk, sub) => sub,
_ => panic!(),
};
let mut context = general_context.clone();
context.set_taint_source(jmp, &symbol.name, current_sub);
let pi_state_at_taint_source =
match pointer_inference_results.get_node_value(node) {
Some(NodeValue::Value(val)) => Some(val.clone()),
_ => None,
};
let mut computation = create_computation(context.clone(), None);
computation.set_node_value(
node,
NodeValue::Value(State::new(
symbol,
&project.stack_pointer_register,
pi_state_at_taint_source.as_ref(),
current_sub,
)),
);
computation.compute_with_max_steps(100);
for (sub_name, node_index) in entry_sub_to_entry_node_map.iter() {
if let Some(node_weight) = computation.get_node_value(*node_index) {
let state = node_weight.unwrap_value();
if !state.is_empty() {
context.generate_cwe_warning(sub_name);
if system.tid == *target {
if let Some(source_node) = string_abstraction.get_node_value(edge.source())
{
if let Some(pi_node) = analysis_results
.pointer_inference
.unwrap()
.get_node_value(edge.source())
{
let pi_state = pi_node.unwrap_value();
let source_state = source_node.unwrap_value();
check_system_call_parameter(
source_state,
pi_state,
&system,
&jmp.tid,
&cwe_sender,
&log_sender,
string_abstraction.get_context().runtime_memory_image,
)
}
}
}
......@@ -168,138 +133,116 @@ pub fn check_cwe(
_ => panic!(),
};
}
let cwe_warnings = cwe_warnings.into_iter().map(|(_, cwe)| cwe).collect();
let log_messages = log_receiver.try_iter().collect();
(Vec::new(), cwe_warnings)
(log_messages, cwe_warnings)
}
/// Returns a map from subroutine names to their corresponding start node index
fn get_entry_sub_to_entry_node_map(
project: &Project,
context: &Context,
) -> HashMap<String, NodeIndex> {
let mut entry_sub_to_entry_blocks_map = HashMap::new();
let subs: HashMap<Tid, &Term<Sub>> = project
.program
.term
.subs
.iter()
.map(|sub| (sub.tid.clone(), sub))
.collect();
for sub_tid in project.program.term.entry_points.iter() {
if let Some(sub) = subs.get(sub_tid) {
if let Some(entry_block) = sub.term.blocks.get(0) {
entry_sub_to_entry_blocks_map.insert(
(sub_tid.clone(), sub.term.name.clone()),
entry_block.tid.clone(),
/// Checks the system call parameter given by the Bricks Domain.
pub fn check_system_call_parameter(
source_state: &State<BricksDomain>,
pi_state: &PointerInferenceState,
system_symbol: &ExternSymbol,
jmp_tid: &Tid,
cwe_collector: &crossbeam_channel::Sender<CweWarning>,
log_collector: &crossbeam_channel::Sender<LogMessage>,
runtime_memory_image: &RuntimeMemoryImage,
) {
let sub = source_state.get_current_sub().unwrap();
if let Some(Arg::Register { var, .. }) = system_symbol.parameters.get(0) {
if let Some(value) = source_state.get_variable_to_pointer_map().get(var) {
let contains_string_constant = value.get_absolute_value().is_some();
let contains_relative_string_pointer = !value.get_relative_values().is_empty();
if contains_relative_string_pointer {
let mut parameter_domain =
Context::<BricksDomain>::merge_domains_from_multiple_pointer_targets(
source_state,
pi_state,
value.get_relative_values(),
);
if contains_string_constant {
if let Ok(global_string) = runtime_memory_image
.read_string_until_null_terminator(
&value.get_absolute_value().unwrap().try_to_bitvec().unwrap(),
)
{
parameter_domain.widen(&BricksDomain::from(global_string.to_string()));
} else {
parameter_domain = BricksDomain::Top;
}
}
check_if_string_domain_indicates_vulnerability(
parameter_domain,
jmp_tid,
sub,
system_symbol,
cwe_collector,
);
} else if !contains_string_constant && !contains_relative_string_pointer {
let _ = cwe_collector.send(generate_cwe_warning(
&sub.term.name,
jmp_tid,
&system_symbol.name,
));
}
} else {
let _ = log_collector.send(LogMessage::new_debug(format!(
"No Parameter tracked for system call at {}",
jmp_tid.address
)));
}
}
let mut tid_to_graph_indices_map = HashMap::new();
for node in context.get_graph().node_indices() {
if let graph::Node::BlkStart(block, sub) = context.get_graph()[node] {
tid_to_graph_indices_map.insert((block.tid.clone(), sub.tid.clone()), node);
}
}
entry_sub_to_entry_blocks_map
.into_iter()
.filter_map(|((sub_tid, name), block_tid)| {
tid_to_graph_indices_map
.get(&(block_tid, sub_tid))
.map(|start_node_index| (name, *start_node_index))
})
.collect()
}
/// - string_symbols:
/// - Maps the TID of an extern string related symbol to the corresponding extern symbol struct.
/// - user_input_symbols:
/// - Maps the TID of an extern symbol that take input from the user to the corresponding extern symbol struct.
/// - extern_symbol_map:
/// - Maps the TID of an extern symbol to the extern symbol struct.
/// - format_string_index:
/// - Maps a symbol name to the index of its format string parameter.
pub struct SymbolMaps<'a> {
string_symbol_map: HashMap<Tid, &'a ExternSymbol>,
user_input_symbol_map: HashMap<Tid, &'a ExternSymbol>,
extern_symbol_map: HashMap<Tid, &'a ExternSymbol>,
format_string_index: HashMap<String, usize>,
}
impl<'a> SymbolMaps<'a> {
/// Creates a new instance of the symbol maps struct.
pub fn new(project: &'a Project, config: &Config) -> Self {
let mut extern_symbol_map = HashMap::new();
for (tid, symbol) in project.program.term.extern_symbols.iter() {
extern_symbol_map.insert(tid.clone(), symbol);
/// Checks if the Bricks Domain indicates a vulnerability at the system call.
pub fn check_if_string_domain_indicates_vulnerability(
input_domain: BricksDomain,
jmp_tid: &Tid,
sub: &Term<Sub>,
system_symbol: &ExternSymbol,
cwe_collector: &crossbeam_channel::Sender<CweWarning>,
) {
match &input_domain {
BricksDomain::Top => {
let _ = cwe_collector.send(generate_cwe_warning(
&sub.term.name,
jmp_tid,
&system_symbol.name,
));
}
BricksDomain::Value(bricks) => {
let partially_known = bricks
.iter()
.any(|brick| matches!(brick, crate::abstract_domain::BrickDomain::Top));
if partially_known {
let _ = cwe_collector.send(generate_cwe_warning(
&sub.term.name,
jmp_tid,
&system_symbol.name,
));
}
SymbolMaps {
string_symbol_map: crate::utils::symbol_utils::get_symbol_map(
project,
&config.string_symbols[..],
),
user_input_symbol_map: crate::utils::symbol_utils::get_symbol_map(
project,
&config.user_input_symbols[..],
),
extern_symbol_map,
format_string_index: config.format_string_index.clone(),
}
}
}
/// - block_first_def_set:
/// - A set containing a given [`Def`](crate::intermediate_representation::Def) as the first `Def` of the block.
/// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
/// - block_start_last_def_map:
/// - A map to get the node index of the `BlkStart` node
/// containing a given [`Def`](crate::intermediate_representation::Def) as the last `Def` of the block.
/// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
/// - jmp_to_blk_end_node_map:
/// - A map to get the node index of the `BlkEnd` node containing a given [`Jmp`].
/// The keys are of the form `(Jmp-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
pub struct BlockMaps {
block_first_def_set: HashSet<(Tid, Tid)>,
block_start_last_def_map: HashMap<(Tid, Tid), NodeIndex>,
jmp_to_blk_end_node_map: HashMap<(Tid, Tid), NodeIndex>,
}
impl BlockMaps {
/// Creates a new instance of the block maps struct using the analysis results.
pub fn new(analysis_results: &AnalysisResults) -> Self {
let mut block_first_def_set = HashSet::new();
let mut block_start_last_def_map = HashMap::new();
let mut jmp_to_blk_end_node_map = HashMap::new();
for (node_id, node) in analysis_results.control_flow_graph.node_references() {
match node {
Node::BlkStart(block, sub) => match block.term.defs.len() {
0 => (),
num_of_defs => {
let first_def = block.term.defs.get(0).unwrap();
let last_def = block.term.defs.get(num_of_defs - 1).unwrap();
block_first_def_set.insert((first_def.tid.clone(), sub.tid.clone()));
block_start_last_def_map
.insert((last_def.tid.clone(), sub.tid.clone()), node_id);
}
},
Node::BlkEnd(block, sub) => {
for jmp in block.term.jmps.iter() {
jmp_to_blk_end_node_map.insert((jmp.tid.clone(), sub.tid.clone()), node_id);
}
}
_ => (),
}
}
BlockMaps {
block_first_def_set,
block_start_last_def_map,
jmp_to_blk_end_node_map,
}
}
/// Generates the CWE Warning for the CWE 78 check
pub fn generate_cwe_warning(sub_name: &str, jmp_tid: &Tid, symbol_name: &str) -> CweWarning {
let description: String = format!(
"(OS Command Injection) Input for call to {} may not be properly sanitized in function {} ({})",
symbol_name, sub_name, jmp_tid.address,
);
CweWarning::new(
String::from(CWE_MODULE.name),
String::from(CWE_MODULE.version),
description,
)
.addresses(vec![jmp_tid.address.clone()])
.tids(vec![format!("{}", jmp_tid)])
.symbols(vec![String::from(sub_name)])
.other(vec![vec![
String::from("OS Command Injection"),
symbol_name.to_string(),
]])
}
use std::{
collections::{HashMap, HashSet},
sync::Arc,
};
use petgraph::graph::NodeIndex;
use super::{state::State, BlockMaps, SymbolMaps, CWE_MODULE};
use crate::{
abstract_domain::{AbstractDomain, DataDomain, IntervalDomain},
analysis::{
forward_interprocedural_fixpoint::Context as PiContext, graph::Graph,
pointer_inference::PointerInference as PointerInferenceComputation,
pointer_inference::State as PointerInferenceState,
},
checkers::cwe_476::Taint,
intermediate_representation::*,
utils::{binary::RuntimeMemoryImage, log::CweWarning},
};
pub mod parameter_detection;
#[derive(Clone)]
pub struct Context<'a> {
/// A pointer to the corresponding project struct.
project: &'a Project,
/// A pointer to the representation of the runtime memory image.
runtime_memory_image: &'a RuntimeMemoryImage,
/// The reversed control flow graph for the analysis
graph: Arc<Graph<'a>>,
/// A pointer to the results of the pointer inference analysis.
/// They are used to determine the targets of pointers to memory,
/// which in turn is used to keep track of taint on the stack or on the heap.
pub pointer_inference_results: &'a PointerInferenceComputation<'a>,
/// - block_first_def_set:
/// - A set containing a given [`Def`] as the first `Def` of the block.
/// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
/// - block_start_last_def_map:
/// - A map to get the node index of the `BlkStart` node containing a given [`Def`] as the last `Def` of the block.
/// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
/// - jmp_to_blk_end_node_map:
/// - A map to get the node index of the `BlkEnd` node containing a given [`Jmp`].
/// The keys are of the form `(Jmp-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
block_maps: Arc<BlockMaps>,
/// - string_symbols:
/// - Maps the TID of an extern string related symbol to the corresponding extern symbol struct.
/// - user_input_symbols:
/// - Maps the TID of an extern symbol that take input from the user to the corresponding extern symbol struct.
/// - extern_symbol_map:
/// - Maps the TID of an extern symbol to the extern symbol struct.
symbol_maps: Arc<SymbolMaps<'a>>,
/// The call whose parameter values are the sources for taint for the analysis.
pub taint_source: Option<&'a Term<Jmp>>,
/// The subroutine from which the taint source originates
pub taint_source_sub: Option<&'a Term<Sub>>,
/// The name of the function, whose parameter values are the taint sources.
pub taint_source_name: Option<String>,
/// A channel where found CWE hits can be sent to.
cwe_collector: crossbeam_channel::Sender<CweWarning>,
}
impl<'a> Context<'a> {
/// Creates a new context for the CWE 78 taint analysis.
pub fn new(
project: &'a Project,
runtime_memory_image: &'a RuntimeMemoryImage,
graph: Arc<Graph<'a>>,
pointer_inference_results: &'a PointerInferenceComputation<'a>,
symbol_maps: Arc<SymbolMaps<'a>>,
block_maps: Arc<BlockMaps>,
cwe_collector: crossbeam_channel::Sender<CweWarning>,
) -> Self {
Context {
project,
runtime_memory_image,
graph,
pointer_inference_results,
symbol_maps,
block_maps,
taint_source: None,
taint_source_sub: None,
taint_source_name: None,
cwe_collector,
}
}
/// Generates the CWE Warning for the CWE 78 check
pub fn generate_cwe_warning(&self, sub_name: &str) {
let source = self.taint_source.unwrap();
let name = self.taint_source_name.clone().unwrap();
let description: String = format!(
"(Potential OS Command Injection) Input for call to {} is not properly sanitized in function {} ({})",
name, sub_name, source.tid.address,
);
let cwe_warning = CweWarning::new(
String::from(CWE_MODULE.name),
String::from(CWE_MODULE.version),
description,
)
.addresses(vec![source.tid.address.clone()])
.tids(vec![format!("{}", source.tid)])
.symbols(vec![String::from(sub_name)])
.other(vec![vec![String::from("OS Command Injection"), name]]);
let _ = self.cwe_collector.send(cwe_warning);
}
/// Set the taint source and the current function for the analysis.
pub fn set_taint_source(
&mut self,
taint_source: &'a Term<Jmp>,
taint_source_symbol_name: &str,
taint_source_sub: &'a Term<Sub>,
) {
self.taint_source = Some(taint_source);
self.taint_source_sub = Some(taint_source_sub);
self.taint_source_name = Some(taint_source_symbol_name.to_string());
}
/// Returns the pointer inference graph
pub fn get_pi_graph(&self) -> &Graph<'a> {
self.pointer_inference_results.get_graph()
}
/// Checks whether the firt parameter of a string related function points to a taint.
/// If so, removes the taint at the target memory.
pub fn first_param_points_to_memory_taint(
&self,
pi_state: &PointerInferenceState,
state: &mut State,
parameter: &Arg,
) -> bool {
let mut points_to_memory_taint: bool = false;
if let Ok(address) = pi_state.eval_parameter_arg(
parameter,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
let temp_mem_taints: Vec<DataDomain<IntervalDomain>> =
self.add_temporary_callee_saved_register_taints_to_mem_taints(pi_state, state);
if state.address_points_to_taint(address.clone(), pi_state) {
if let Some(standard_cconv) = self.project.get_standard_calling_convention() {
state.remove_callee_saved_taint_if_destination_parameter(
&address,
pi_state,
standard_cconv,
);
}
state.remove_mem_taint_at_target(&address);
points_to_memory_taint = true;
}
temp_mem_taints
.iter()
.for_each(|addr| state.remove_mem_taint_at_target(addr));
}
points_to_memory_taint
}
/// Takes taints of callee saved registers and adds them temporarily to the corresponding memory
/// taints if possible.
pub fn add_temporary_callee_saved_register_taints_to_mem_taints(
&self,
pi_state: &PointerInferenceState,
state: &mut State,
) -> Vec<DataDomain<IntervalDomain>> {
let mut temp_mem_taints: Vec<DataDomain<IntervalDomain>> = Vec::new();
if let Some(standard_cconv) = self.project.get_standard_calling_convention() {
for (var, _) in state
.get_callee_saved_register_taints(standard_cconv)
.iter()
{
let address = pi_state.eval(&Expression::Var(var.clone()));
if !state.address_points_to_taint(address.clone(), pi_state) {
temp_mem_taints.push(address.clone());
state.save_taint_to_memory(&address, Taint::Tainted(var.size));
}
}
}
temp_mem_taints
}
/// Checks whether the current def term is the last def term
/// of its corresponding block and if so, returns the node index of the BlkStart node.
pub fn get_blk_start_node_if_last_def(
&self,
state: &State,
def: &Term<Def>,
) -> Option<NodeIndex> {
if let Some(sub) = state.get_current_sub() {
if let Some(node) = self
.block_maps
.block_start_last_def_map
.get(&(def.tid.clone(), sub.tid.clone()))
{
return Some(*node);
}
} else {
panic!("Missing current Sub.");
}
None
}
/// Creates a map from def terms to their corresponding pointer inference states
/// by taking the pointer inference state of the BlkStart node and updating it
/// for each def term in the block.
pub fn create_pi_def_map(
&self,
block_start_node: NodeIndex,
) -> Option<HashMap<Tid, PointerInferenceState>> {
if let Some(block_node) = self.get_pi_graph().node_weight(block_start_node) {
if let Some(pi_value) = self
.pointer_inference_results
.get_node_value(block_start_node)
{
let mut pi_def_map: HashMap<Tid, PointerInferenceState> = HashMap::new();
let pi_context = self.pointer_inference_results.get_context();
let mut new_pi_state = Some(pi_value.unwrap_value().clone());
for def in block_node.get_block().term.defs.iter() {
// Add the pi state to the map that is available after the def was executed
// If no state is available after the update_def() call, none is added
if new_pi_state.is_none() {
break;
}
new_pi_state = pi_context.update_def(&new_pi_state.unwrap(), def);
if let Some(new_state) = new_pi_state.clone() {
pi_def_map.insert(def.tid.clone(), new_state);
}
}
return Some(pi_def_map);
}
} else {
panic!("Unexpected node index for BlkStart Node.");
}
None
}
/// Handles assignment and load definition updates
pub fn handle_assign_and_load(
&self,
state: State,
def: &Term<Def>,
var: &Variable,
input: &Expression,
) -> State {
let mut new_state = state;
if let Some(taint) = new_state.get_register_taint(var) {
if taint.is_tainted() {
new_state.set_expression_taint_and_store_constants(
&def.tid,
var,
input,
&self.project.stack_pointer_register,
self.runtime_memory_image,
)
}
}
new_state
}
/// Gets the BlkEnd node of an external function call
pub fn get_source_node(&self, state: &State, call_source: &Tid) -> NodeIndex {
let blk_end_node_id = self.block_maps.jmp_to_blk_end_node_map.get(&(
call_source.clone(),
state.get_current_sub().as_ref().unwrap().tid.clone(),
));
if let Some(blk_end_node) = blk_end_node_id {
*blk_end_node
} else {
panic!("Malformed Control Flow Graph.");
}
}
/// Updates the target state at the callsite by removing non parameter register taints
/// and by merging callee saved register taints from the return state if available
pub fn update_target_state_for_callsite(
&self,
return_state: Option<&State>,
target_state: Option<&State>,
caller_sub: &Term<Sub>,
) -> Option<State> {
if let Some(target) = target_state {
let mut new_state = target.clone();
new_state.remove_non_parameter_taints_for_generic_function(self.project);
new_state.set_current_sub(caller_sub);
if let Some(return_) = return_state {
new_state.merge_callee_saved_taints_from_return_state(
return_,
self.project.get_standard_calling_convention(),
);
}
return Some(new_state);
}
None
}
}
impl<'a> crate::analysis::backward_interprocedural_fixpoint::Context<'a> for Context<'a> {
type Value = State;
/// Get the underlying graph of the fixpoint computation
fn get_graph(&self) -> &Graph<'a> {
&self.graph
}
/// Merge two states
fn merge(&self, state1: &State, state2: &State) -> State {
state1.merge(state2)
}
/// Updates State according to side effects of the definition
fn update_def(&self, state: &State, def: &Term<Def>) -> Option<State> {
if state.is_empty() {
// Without taint there is nothing to propagate.
return None;
}
let mut new_state = state.clone();
// Check whether the def is the last def of a block and if so, create the
// Def Pi Map
if let Some(blk_start_node) = self.get_blk_start_node_if_last_def(&new_state, def) {
new_state.set_pi_def_map(self.create_pi_def_map(blk_start_node));
}
match &def.term {
Def::Assign { var, value: input }
| Def::Load {
var,
address: input,
} => new_state = self.handle_assign_and_load(new_state, def, var, input),
Def::Store { address, value } => new_state.taint_value_to_be_stored(
&def.tid,
address,
value,
&self.project.stack_pointer_register,
self.runtime_memory_image,
),
}
// Check whether the current def term is the first of the block and if so, remove
// the pi_def_map for the current state to save memory
if self
.block_maps
.block_first_def_set
.get(&(
def.tid.clone(),
new_state.get_current_sub().as_ref().unwrap().tid.clone(),
))
.is_some()
{
new_state.set_pi_def_map(None);
}
Some(new_state)
}
/// Either returns a copy of the input state when there is no conditional
/// Or merges both incoming states from the branch and conditional branch
fn update_jumpsite(
&self,
state_after_jump: &State,
_jump: &Term<Jmp>,
_untaken_conditional: Option<&Term<Jmp>>,
_jumpsite: &Term<Blk>,
) -> Option<State> {
Some(state_after_jump.clone())
}
/// The specific execution is dependent on the existence of a return and target state
/// If there is no return state and the taint source is not in the callee, none is returned.
/// If there is no return state and the taint source is in the callee, the target state is copied.
/// If there is a return state and no target state, non callee saved registers are removed and the
/// updated return state is let through.
/// If there is a return state and a target state, non parameter taints are removed from the target state
/// and the remaining taints plus the callee saved taints from the return state are combined in a new state
fn update_callsite(
&self,
target_state: Option<&State>,
return_state: Option<&State>,
caller_sub: &Term<Sub>,
_call: &Term<Jmp>,
_return_: &Term<Jmp>,
) -> Option<State> {
// Return state is present
if let Some(return_) = return_state {
// Update the target state if there is one. Otherwise clone the return state and
// remove all non callee saved register taints
let new_state =
self.update_target_state_for_callsite(return_state, target_state, caller_sub);
if new_state.is_none() {
let mut new_state = return_.clone();
if let Some(calling_conv) = self.project.get_standard_calling_convention() {
new_state.remove_non_callee_saved_taint(calling_conv);
}
return Some(new_state);
}
return new_state;
// No return state: check for taint source
} else {
// If the called subroutine contains the taint source, update the target state if there is one.
// Otherwise return None.
if let Some(source_sub) = self.taint_source_sub {
if source_sub.tid == caller_sub.tid {
return self.update_target_state_for_callsite(
return_state,
target_state,
caller_sub,
);
}
}
}
None
}
/// Simply sends a copy of the state after the call return to the callsite
/// Will be used at the callsite to restore non-volatile registers
fn split_call_stub(&self, combined_state: &State) -> Option<State> {
Some(combined_state.clone())
}
/// Removes all register taints except for possible return register taints
fn split_return_stub(
&self,
combined_state: &State,
returned_from_sub: &Term<Sub>,
) -> Option<State> {
let mut new_state = combined_state.clone();
if let Some(calling_conv) = self.project.get_standard_calling_convention() {
let return_registers: HashSet<String> =
calling_conv.return_register.iter().cloned().collect();
new_state.remove_all_except_return_register_taints(return_registers);
}
new_state.set_current_sub(returned_from_sub);
Some(new_state)
}
/// Check whether the extern call is direct and if so, taint the extern symbol parameters and
/// remove non callee saved registers.
fn update_call_stub(&self, state_after_call: &State, call: &Term<Jmp>) -> Option<State> {
if state_after_call.is_empty() {
return None;
}
let mut new_state = state_after_call.clone();
match &call.term {
Jmp::Call { target, .. } => {
let source_node = self.get_source_node(&new_state, &call.tid);
if let Some(extern_symbol) = self.symbol_maps.extern_symbol_map.get(target) {
new_state = self.taint_generic_extern_symbol_parameters(
&new_state,
extern_symbol,
source_node,
)
} else {
panic!("Extern symbol not found.");
}
}
_ => panic!("Malformed control flow graph encountered."),
}
Some(new_state)
}
/// Just returns a copy of the input state.
fn specialize_conditional(
&self,
state: &State,
_condition: &Expression,
_is_true: bool,
) -> Option<State> {
Some(state.clone())
}
}
#[cfg(test)]
mod tests;
use petgraph::graph::NodeIndex;
use crate::intermediate_representation::Arg;
use crate::{
analysis::pointer_inference::State as PointerInferenceState, checkers::cwe_476::Taint,
};
use crate::{
analysis::{
backward_interprocedural_fixpoint::Context as _,
interprocedural_fixpoint_generic::NodeValue,
},
intermediate_representation::ExternSymbol,
utils::arguments,
};
use super::{Context, State};
impl<'a> Context<'a> {
/// This function determines whether the taint procedure for string related, user input related,
/// or other extern symbols is used based on the symbol's tid.
pub fn taint_generic_extern_symbol_parameters(
&self,
state: &State,
symbol: &ExternSymbol,
call_source_node: NodeIndex,
) -> State {
if self.is_string_symbol(symbol) {
return self.taint_extern_string_symbol_parameters(state, symbol, call_source_node);
}
if self.is_user_input_symbol(symbol) {
return self.taint_user_input_symbol_parameters(state, symbol, call_source_node);
}
self.taint_other_extern_symbol_parameters(state, symbol, call_source_node)
}
/// Checks whether the current symbol is a string symbol as defined in the symbol configuration.
pub fn is_string_symbol(&self, symbol: &ExternSymbol) -> bool {
self.symbol_maps
.string_symbol_map
.get(&symbol.tid)
.is_some()
}
/// Checks whether the current symbol is a user input symbol as defined in the symbol configuration.
pub fn is_user_input_symbol(&self, symbol: &ExternSymbol) -> bool {
self.symbol_maps
.user_input_symbol_map
.get(&symbol.tid)
.is_some()
}
/// In case of a *scanf* call, all taints are removed and a warning is generated, as the input can be arbitrary.
/// However, the format string is analysed to avoid false positives. (e.g. pure integer input
/// does not trigger a cwe warning)
/// In case of a *sscanf* call, the source string pointer parameter is tainted, if one of the tainted
/// return values is a string.
/// Since the format parameters of a (s)scanf call are also the return locations, the relevance of the
/// call to the analysis is checked after the parameters have been parsed.
/// If the parameter list is empty (no string parameters), the function call is of no relevance.
/// Furthermore, if the parameter list contains elements but none of them points to a tainted memory position,
/// the function call is of no relevance, too.
pub fn taint_user_input_symbol_parameters(
&self,
state: &State,
user_input_symbol: &ExternSymbol,
call_source_node: NodeIndex,
) -> State {
let mut new_state = state.clone();
new_state
.remove_non_callee_saved_taint(user_input_symbol.get_calling_convention(self.project));
if let Some(NodeValue::Value(pi_state)) = self
.pointer_inference_results
.get_node_value(call_source_node)
{
if let Ok(parameters) = arguments::get_variable_parameters(
self.project,
pi_state,
user_input_symbol,
&self.symbol_maps.format_string_index,
self.runtime_memory_image,
) {
if !parameters.is_empty() {
match user_input_symbol.name.as_str() {
"scanf" | "__isoc99_scanf" => self.process_scanf(
call_source_node,
&mut new_state,
pi_state,
parameters,
),
"sscanf" | "__isoc99_sscanf" => {
let source_string_register =
user_input_symbol.parameters.get(0).unwrap();
self.process_sscanf(
&mut new_state,
pi_state,
parameters,
source_string_register,
)
}
_ => panic!("Invalid user input symbol."),
}
}
}
// TODO: Log errors that came up during the parameter parsing.
}
new_state
}
/// This function iterates over the scanf string parameters and generates a CWE warning
/// in case one of them points to a tainted memory position.
/// If the call is relevant, all taints are deleted since we cannot determine anymore,
/// where the whole input originates from.
pub fn process_scanf(
&self,
call_source_node: NodeIndex,
new_state: &mut State,
pi_state: &PointerInferenceState,
parameters: Vec<Arg>,
) {
for param in parameters.iter() {
if let Ok(address) = pi_state.eval_parameter_arg(
param,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
if new_state.address_points_to_taint(address.clone(), pi_state) {
self.generate_cwe_warning(
&self
.get_graph()
.node_weight(call_source_node)
.unwrap()
.get_sub()
.term
.name,
);
new_state.remove_all_register_taints();
new_state.remove_all_memory_taints();
break;
}
}
}
}
/// This function iterates over the sscanf string parameters and taints the source string in case one
/// of the return parameters points to a tainted memory position.
/// Note that the return parameters and the format string input parameters are the same.
pub fn process_sscanf(
&self,
new_state: &mut State,
pi_state: &PointerInferenceState,
format_string_parameters: Vec<Arg>,
source_string_parameter: &Arg,
) {
let mut is_relevant = false;
for param in format_string_parameters.iter() {
if let Ok(address) = pi_state.eval_parameter_arg(
param,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
// Remove the tainted memory region if the return parameter points to it.
if new_state.address_points_to_taint(address.clone(), pi_state) {
is_relevant = true;
new_state.remove_mem_taint_at_target(&address);
}
}
}
if is_relevant {
if let Ok(address) = pi_state.eval_parameter_arg(
source_string_parameter,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
new_state.save_taint_to_memory(
&address,
Taint::Tainted(self.project.stack_pointer_register.size),
);
}
}
}
/// Taints the parameters of a non string related extern symbol if it is relevant to the taint analysis.
/// To determine whether the symbol is relevant, it is checked if either the arch's return registers are tainted
pub fn taint_other_extern_symbol_parameters(
&self,
state: &State,
symbol: &ExternSymbol,
call_source_node: NodeIndex,
) -> State {
let mut new_state = state.clone();
// Check whether the return register is tainted before the call
// If so, taint the parameter registers and memory addresses of possible stack parameters
let return_registers = arguments::get_return_registers_from_symbol(symbol);
if new_state.check_return_registers_for_taint(return_registers) {
new_state.remove_non_callee_saved_taint(symbol.get_calling_convention(self.project));
if let Some(NodeValue::Value(pi_state)) = self
.pointer_inference_results
.get_node_value(call_source_node)
{
self.taint_function_parameters(&mut new_state, pi_state, symbol.parameters.clone());
}
}
new_state
}
/// This function taints the registers and stack positions of the parameter pointers for string functions
/// such as sprintf, snprintf, etc.
/// The size parameter is ignored if available (e.g. snprintf, strncat etc.).
/// If the string function has a variable amount of parameters, the fixed parameters are overwritten
/// as they only represent the destination of the incoming variable parameters.
pub fn taint_extern_string_symbol_parameters(
&self,
state: &State,
string_symbol: &ExternSymbol,
call_source_node: NodeIndex,
) -> State {
let mut new_state = state.clone();
new_state.remove_non_callee_saved_taint(string_symbol.get_calling_convention(self.project));
if let Some(NodeValue::Value(pi_state)) = self
.pointer_inference_results
.get_node_value(call_source_node)
{
if self.is_relevant_string_function_call(string_symbol, pi_state, &mut new_state) {
let mut parameters = string_symbol.parameters.clone();
if string_symbol.has_var_args {
if let Ok(args) = arguments::get_variable_parameters(
self.project,
pi_state,
string_symbol,
&self.symbol_maps.format_string_index,
self.runtime_memory_image,
) {
parameters = args;
} else {
// TODO: Log errors that came up during the parameter parsing.
parameters = vec![]
}
}
self.taint_function_parameters(&mut new_state, pi_state, parameters);
}
}
new_state
}
/// Checks whether a string function call is a relevant call to the taint analysis.
/// Since the first parameter of these string functions is also the return parameter,
/// it is checked whether is points to a tainted memory address.
pub fn is_relevant_string_function_call(
&self,
symbol: &ExternSymbol,
pi_state: &PointerInferenceState,
state: &mut State,
) -> bool {
if let Some(param) = symbol.parameters.get(0) {
self.first_param_points_to_memory_taint(pi_state, state, param)
} else {
panic!("Missing parameters for string related function!");
}
}
/// Taints register and stack function arguments.
pub fn taint_function_parameters(
&self,
state: &mut State,
pi_state: &PointerInferenceState,
parameters: Vec<Arg>,
) {
for parameter in parameters.iter() {
match parameter {
Arg::Register { var, .. } => {
state.set_register_taint(var, Taint::Tainted(var.size))
}
Arg::Stack { size, .. } => {
if let Ok(address) = pi_state.eval_parameter_arg(
parameter,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
state.save_taint_to_memory(&address, Taint::Tainted(*size))
}
}
}
}
}
}
#[cfg(test)]
mod tests;
use petgraph::graph::NodeIndex;
use crate::abstract_domain::IntervalDomain;
use crate::analysis::pointer_inference::{Data, PointerInference as PointerInferenceComputation};
use crate::intermediate_representation::{
Arg, BinOpType, Bitvector, ByteSize, CallingConvention, Expression, ExternSymbol, Tid, Variable,
};
use crate::utils::binary::RuntimeMemoryImage;
use crate::{checkers::cwe_476::Taint, utils::log::CweWarning};
use super::super::tests::{bv, Setup};
use super::Context;
use std::collections::{HashMap, HashSet};
impl<'a> Context<'a> {
pub fn set_cwe_collector(&mut self, collector: crossbeam_channel::Sender<CweWarning>) {
self.cwe_collector = collector;
}
}
#[test]
fn tainting_generic_extern_symbol_parameters() {
let mut setup = Setup::new();
setup.project.calling_conventions = vec![CallingConvention::mock_with_parameter_registers(
vec!["RDI".to_string(), "RSI".to_string()],
vec!["XMM0".to_string()],
)];
let r9_reg = Variable::mock("R9", 8 as u64);
let rbp_reg = Variable::mock("RBP", 8 as u64);
let rdi_reg = Variable::mock("RDI", 8 as u64);
let rsi_reg = Variable::mock("RSI", 8 as u64);
let rax_reg = Variable::mock("RAX", 8 as u64);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
setup
.state
.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
let mut string_syms: HashMap<Tid, &ExternSymbol> = HashMap::new();
let string_sym = ExternSymbol::mock_string();
string_syms.insert(Tid::new("sprintf"), &string_sym);
let mut format_string_index: HashMap<String, usize> = HashMap::new();
format_string_index.insert("sprintf".to_string(), 1);
let context = Context::mock(
&setup.project,
string_syms,
HashMap::new(),
format_string_index,
&pi_results,
&mem_image,
);
let node_id = context
.block_maps
.jmp_to_blk_end_node_map
.get(&(Tid::new("call_string"), Tid::new("func")))
.unwrap();
// Test Case 1: String Symbol
let mut new_state = context.taint_generic_extern_symbol_parameters(
&setup.state,
&ExternSymbol::mock_string(),
node_id.clone(),
);
// Parameter
assert_eq!(new_state.get_register_taint(&rdi_reg), None,);
assert_eq!(new_state.get_register_taint(&rsi_reg), None,);
// Callee Saved
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
// Non Callee Saved
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(new_state.get_register_taint(&rax_reg), None);
new_state.remove_all_register_taints();
new_state.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
new_state.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
new_state.set_register_taint(&rax_reg, Taint::Tainted(rax_reg.size));
// Test Case 2: Other Extern Symbol
new_state = context.taint_generic_extern_symbol_parameters(
&new_state,
&ExternSymbol::mock(),
node_id.clone(),
);
// Parameter
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
// Callee Saved
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
// Non Callee Saved
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(new_state.get_register_taint(&rax_reg), None);
assert_eq!(new_state.get_register_taint(&rsi_reg), None);
}
#[test]
fn tainting_extern_string_symbol_parameters() {
let mut setup = Setup::new();
setup.project.calling_conventions = vec![CallingConvention::mock_with_parameter_registers(
vec!["RDI".to_string(), "RSI".to_string()],
vec!["XMM0".to_string()],
)];
let rbp_reg = Variable::mock("RBP", 8 as u64); // callee saved -> will point to RSP
let rdi_reg = Variable::mock("RDI", 8 as u64); // parameter 1 -> will point to RBP - 8
let rsi_reg = Variable::mock("RSI", 8 as u64); // parameter 2
setup
.state
.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
setup
.state
.save_taint_to_memory(&setup.base_sixteen_offset, Taint::Tainted(ByteSize::new(8)));
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let mut format_string_index: HashMap<String, usize> = HashMap::new();
format_string_index.insert("sprintf".to_string(), 1);
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
format_string_index,
&pi_results,
&mem_image,
);
let node_id = context
.block_maps
.jmp_to_blk_end_node_map
.get(&(Tid::new("call_string"), Tid::new("func")))
.unwrap();
let new_state = context.taint_extern_string_symbol_parameters(
&setup.state,
&ExternSymbol::mock_string(),
*node_id,
);
assert_eq!(
new_state.address_points_to_taint(setup.base_sixteen_offset, &setup.pi_state),
true
);
assert_eq!(
new_state.address_points_to_taint(setup.base_eight_offset, &setup.pi_state),
false
);
assert_eq!(new_state.get_register_taint(&rdi_reg), None,);
assert_eq!(new_state.get_register_taint(&rsi_reg), None,);
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
}
#[test]
fn tainting_user_input_symbol_parameters() {
let mut setup = Setup::new();
let (cwe_sender, cwe_receiver) = crossbeam_channel::unbounded::<CweWarning>();
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let call_source_node: NodeIndex = graph.node_indices().next().unwrap();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let mut format_string_index: HashMap<String, usize> = HashMap::new();
format_string_index.insert("scanf".to_string(), 0);
let global_address = Bitvector::from_str_radix(16, "500c").unwrap();
let string_address = IntervalDomain::new(global_address.clone(), global_address).into();
let mut pi_result_state = pi_results
.get_node_value(call_source_node)
.unwrap()
.unwrap_value()
.clone();
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
pi_result_state
.write_to_address(
&Expression::BinOp {
op: BinOpType::IntAdd,
lhs: Box::new(Expression::Var(Variable {
name: String::from("RSP"),
size: ByteSize::new(8),
is_temp: false,
})),
rhs: Box::new(Expression::Const(Bitvector::from_u64(0))),
},
&Data::from_target(setup.pi_state.stack_id.clone(), bv(-8)),
&mem_image,
)
.expect("Failed to write to address.");
pi_result_state.set_register(&Variable::mock("RDI", 8 as u64), string_address);
pi_results.set_node_value(pi_result_state, call_source_node);
let mut context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
format_string_index,
&pi_results,
&mem_image,
);
context.set_cwe_collector(cwe_sender);
context.taint_source = Some(&setup.taint_source);
context.taint_source_name = Some("system".to_string());
context.taint_user_input_symbol_parameters(
&setup.state,
&ExternSymbol::mock_scanf(),
call_source_node,
);
assert!(!cwe_receiver.is_empty());
}
#[test]
fn processing_scanf() {
let mut setup = Setup::new();
let string_arg = Arg::Stack {
offset: 0,
size: ByteSize::new(8),
data_type: None,
};
let (cwe_sender, cwe_receiver) = crossbeam_channel::unbounded::<CweWarning>();
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let call_source_node: NodeIndex = graph.node_indices().next().unwrap();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let mut context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
context.set_cwe_collector(cwe_sender);
context.taint_source = Some(&setup.taint_source);
context.taint_source_name = Some("system".to_string());
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
setup
.pi_state
.write_to_address(
&Expression::BinOp {
op: BinOpType::IntAdd,
lhs: Box::new(Expression::Var(Variable {
name: String::from("RSP"),
size: ByteSize::new(8),
is_temp: false,
})),
rhs: Box::new(Expression::Const(Bitvector::from_u64(0))),
},
&Data::from_target(setup.pi_state.stack_id.clone(), bv(-8)),
context.runtime_memory_image,
)
.expect("Failed to write to address.");
context.process_scanf(
call_source_node,
&mut setup.state,
&setup.pi_state,
vec![string_arg],
);
assert!(!cwe_receiver.is_empty());
}
#[test]
fn processing_sscanf() {
let mut setup = Setup::new();
let rdi_reg = Variable::mock("RDI", ByteSize::new(8));
let string_arg = Arg::Stack {
offset: 0,
size: ByteSize::new(8),
data_type: None,
};
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
setup
.pi_state
.set_register(&rdi_reg, setup.base_sixteen_offset.clone());
setup
.pi_state
.write_to_address(
&Expression::BinOp {
op: BinOpType::IntAdd,
lhs: Box::new(Expression::Var(Variable {
name: String::from("RSP"),
size: ByteSize::new(8),
is_temp: false,
})),
rhs: Box::new(Expression::Const(Bitvector::from_u64(0))),
},
&Data::from_target(setup.pi_state.stack_id.clone(), bv(-8)),
context.runtime_memory_image,
)
.expect("Failed to write to address.");
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
context.process_sscanf(
&mut setup.state,
&setup.pi_state,
vec![string_arg],
&Arg::Register {
var: rdi_reg,
data_type: None,
},
);
assert!(setup
.state
.address_points_to_taint(setup.base_sixteen_offset, &setup.pi_state));
assert!(!setup
.state
.address_points_to_taint(setup.base_eight_offset, &setup.pi_state));
}
#[test]
fn tainting_function_arguments() {
let mut setup = Setup::new();
let rdi_reg = Variable::mock("RDI", 8);
let args = vec![
Arg::Register {
var: rdi_reg.clone(),
data_type: None,
},
Arg::Stack {
offset: 24,
size: ByteSize::from(8),
data_type: None,
},
];
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
setup
.pi_state
.write_to_address(
&Expression::BinOp {
op: BinOpType::IntAdd,
lhs: Box::new(Expression::Var(Variable {
name: String::from("RSP"),
size: ByteSize::new(8),
is_temp: false,
})),
rhs: Box::new(Expression::Const(Bitvector::from_u64(24))),
},
&Data::from_target(setup.pi_state.stack_id.clone(), bv(32)),
context.runtime_memory_image,
)
.expect("Failed to write to address.");
context.taint_function_parameters(&mut setup.state, &setup.pi_state, args);
assert_eq!(
setup.state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
assert!(setup.state.address_points_to_taint(
Data::from_target(setup.pi_state.stack_id.clone(), bv(32)),
&setup.pi_state
));
}
#[test]
fn test_is_string_symbol() {
let setup = Setup::new();
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let mut string_symbol_map: HashMap<Tid, &ExternSymbol> = HashMap::new();
let sprintf_symbol = ExternSymbol::mock_string();
let mut memcpy_symbol = ExternSymbol::mock();
memcpy_symbol.tid = Tid::new("memcpy");
string_symbol_map.insert(Tid::new("sprintf"), &sprintf_symbol);
let context = Context::mock(
&setup.project,
string_symbol_map,
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
assert!(context.is_string_symbol(&sprintf_symbol));
assert!(!context.is_string_symbol(&memcpy_symbol));
}
#[test]
fn test_is_user_input_symbol() {
let setup = Setup::new();
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let mut user_input_symbol_map: HashMap<Tid, &ExternSymbol> = HashMap::new();
let mut scanf_symbol = ExternSymbol::mock();
scanf_symbol.tid = Tid::new("scanf");
let mut memcpy_symbol = ExternSymbol::mock();
memcpy_symbol.tid = Tid::new("memcpy");
user_input_symbol_map.insert(Tid::new("scanf"), &scanf_symbol);
let context = Context::mock(
&setup.project,
HashMap::new(),
user_input_symbol_map,
HashMap::new(),
&pi_results,
&mem_image,
);
assert!(context.is_user_input_symbol(&scanf_symbol));
assert!(!context.is_user_input_symbol(&memcpy_symbol));
}
use petgraph::visit::IntoNodeReferences;
use super::*;
use crate::analysis::{backward_interprocedural_fixpoint::Context as BackwardContext, graph::Node};
use crate::{
abstract_domain::{DataDomain, SizedDomain},
analysis::pointer_inference::{Data, State as PointerInferenceState, ValueDomain},
intermediate_representation::{Expression, Variable},
};
pub fn bv(value: i64) -> ValueDomain {
ValueDomain::from(Bitvector::from_i64(value))
}
impl ExternSymbol {
pub fn mock_string() -> Self {
ExternSymbol {
tid: Tid::new("sprintf"),
addresses: vec!["UNKNOWN".to_string()],
name: "sprintf".to_string(),
calling_convention: Some("__stdcall".to_string()),
parameters: vec![Arg::mock_register("RDI", 8), Arg::mock_register("RSI", 8)],
return_values: vec![Arg::mock_register("RAX", 8)],
no_return: false,
has_var_args: true,
}
}
pub fn mock_scanf() -> Self {
ExternSymbol {
tid: Tid::new("scanf"),
addresses: vec!["UNKNOWN".to_string()],
name: "scanf".to_string(),
calling_convention: Some("__stdcall".to_string()),
parameters: vec![Arg::mock_register("RDI", 8)],
return_values: vec![Arg::mock_register("RAX", 8)],
no_return: false,
has_var_args: true,
}
}
pub fn mock_sscanf() -> Self {
ExternSymbol {
tid: Tid::new("sscanf"),
addresses: vec!["UNKNOWN".to_string()],
name: "sscanf".to_string(),
calling_convention: Some("__stdcall".to_string()),
parameters: vec![Arg::mock_register("RDI", 8), Arg::mock_register("RSI", 8)],
return_values: vec![Arg::mock_register("RAX", 8)],
no_return: false,
has_var_args: true,
}
}
}
pub struct Setup {
pub project: Project,
pub state: State,
pub pi_state: PointerInferenceState,
pub taint_source: Term<Jmp>,
pub base_eight_offset: DataDomain<ValueDomain>,
pub base_sixteen_offset: DataDomain<ValueDomain>,
}
impl Setup {
pub fn new() -> Self {
let (state, pi_state) = State::mock_with_pi_state();
let stack_id = pi_state.stack_id.clone();
let taint_source = Term {
tid: Tid::new("taint_source"),
term: Jmp::Call {
target: Tid::new("system"),
return_: None,
},
};
let mut project = Project::mock_empty();
let mut sub = Sub::mock("func");
let mut block1 = Blk::mock_with_tid("block1");
let block2 = Blk::mock_with_tid("block2");
let def1 = Def::assign(
"def1",
Variable::mock("RBP", 8 as u64),
Expression::var("RSP", 8),
);
let def2 = Def::assign(
"def2",
Variable::mock("RDI", 8 as u64),
Expression::var("RBP", 8).plus_const(-8),
);
let def3 = Def::assign(
"def3",
Variable::mock("RSI", 8 as u64),
Expression::Const(Bitvector::from_str_radix(16, "3002").unwrap()),
);
let jump = Jmp::call("call_string", "sprintf", Some("block2"));
block1.term.defs.push(def1);
block1.term.defs.push(def2);
block1.term.defs.push(def3);
block1.term.jmps.push(jump.clone());
sub.term.blocks.push(block1);
sub.term.blocks.push(block2);
project
.program
.term
.extern_symbols
.insert(ExternSymbol::mock_string().tid, ExternSymbol::mock_string());
project
.program
.term
.extern_symbols
.insert(ExternSymbol::mock().tid, ExternSymbol::mock());
project.program.term.subs.push(sub);
project.program.term.entry_points.push(Tid::new("func"));
project.calling_conventions.push(CallingConvention::mock());
Setup {
project,
state,
pi_state,
taint_source,
base_eight_offset: Data::from_target(stack_id.clone(), bv(-8)),
base_sixteen_offset: Data::from_target(stack_id.clone(), bv(-16)),
}
}
}
impl<'a> Context<'a> {
pub fn mock(
project: &'a Project,
string_symbols: HashMap<Tid, &'a ExternSymbol>,
user_input_symbols: HashMap<Tid, &'a ExternSymbol>,
format_string_index: HashMap<String, usize>,
pi_results: &'a PointerInferenceComputation<'a>,
mem_image: &'a RuntimeMemoryImage,
) -> Self {
let (cwe_sender, _) = crossbeam_channel::unbounded();
let mut graph = pi_results.get_graph().clone();
graph.reverse();
let mut extern_symbol_map = HashMap::new();
for (tid, symbol) in project.program.term.extern_symbols.iter() {
extern_symbol_map.insert(tid.clone(), symbol);
}
let mut block_first_def_set: HashSet<(Tid, Tid)> = HashSet::new();
let mut block_start_last_def_map = HashMap::new();
let mut jmp_to_blk_end_node_map = HashMap::new();
for (node_id, node) in graph.node_references() {
match node {
Node::BlkStart(block, sub) => match block.term.defs.len() {
0 => (),
num_of_defs => {
let first_def = block.term.defs.get(0).unwrap();
let last_def = block.term.defs.get(num_of_defs - 1).unwrap();
block_first_def_set.insert((first_def.tid.clone(), sub.tid.clone()));
block_start_last_def_map
.insert((last_def.tid.clone(), sub.tid.clone()), node_id);
}
},
Node::BlkEnd(block, sub) => {
for jmp in block.term.jmps.iter() {
jmp_to_blk_end_node_map.insert((jmp.tid.clone(), sub.tid.clone()), node_id);
}
}
_ => (),
}
}
let block_maps: BlockMaps = BlockMaps {
block_first_def_set,
block_start_last_def_map,
jmp_to_blk_end_node_map,
};
let symbol_maps: SymbolMaps = SymbolMaps {
string_symbol_map: string_symbols,
user_input_symbol_map: user_input_symbols,
extern_symbol_map,
format_string_index,
};
Context::new(
project,
mem_image,
std::sync::Arc::new(graph),
pi_results,
std::sync::Arc::new(symbol_maps),
std::sync::Arc::new(block_maps),
cwe_sender,
)
}
}
#[test]
fn setting_taint_source() {
let setup = Setup::new();
let current_sub = Sub::mock("func");
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
let mem_image = RuntimeMemoryImage::mock();
let mut context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
context.set_taint_source(&setup.taint_source, &String::from("system"), &current_sub);
assert_eq!(context.taint_source, Some(&setup.taint_source));
assert_eq!(context.taint_source_name, Some(String::from("system")));
assert_eq!(context.taint_source_sub, Some(&current_sub));
}
#[test]
fn adding_temporary_callee_saved_register_taints_to_mem_taints() {
let mut setup = Setup::new();
let rbp_reg = Variable::mock("RBP", 8 as u64);
let rcx_reg = Variable::mock("RCX", 8 as u64);
setup
.pi_state
.set_register(&rbp_reg, setup.base_eight_offset.clone());
setup
.pi_state
.set_register(&rcx_reg, setup.base_sixteen_offset.clone());
setup
.state
.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
setup
.state
.set_register_taint(&rcx_reg, Taint::Tainted(rcx_reg.size));
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let result = context.add_temporary_callee_saved_register_taints_to_mem_taints(
&setup.pi_state,
&mut setup.state,
);
assert!(result.len() == 1);
assert!(setup
.state
.address_points_to_taint(result.get(0).unwrap().clone(), &setup.pi_state))
}
#[test]
fn first_param_pointing_to_memory_taint() {
let mut setup = Setup::new();
let rdi_reg = Variable::mock("RDI", 8 as u64);
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
setup
.pi_state
.set_register(&rdi_reg, setup.base_eight_offset.clone());
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let arg = Arg::Register {
var: rdi_reg,
data_type: None,
};
assert_eq!(
context.first_param_points_to_memory_taint(&setup.pi_state, &mut setup.state, &arg),
true
);
assert_eq!(
setup
.state
.address_points_to_taint(setup.base_eight_offset, &setup.pi_state),
false
);
}
#[test]
fn creating_pi_def_map() {
let setup = Setup::new();
let rsi_reg = Variable::mock("RSI", 8 as u64);
let rdi_reg = Variable::mock("RDI", 8 as u64);
let def1 = Tid::new("def1");
let def2 = Tid::new("def2");
let def3 = Tid::new("def3");
let stack_id = setup.pi_state.stack_id.clone();
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let current_sub = setup.project.program.term.subs.get(0).unwrap();
let start_node = context
.block_maps
.block_start_last_def_map
.get(&(def3.clone(), current_sub.tid.clone()))
.unwrap();
let pi_def_map = context.create_pi_def_map(start_node.clone()).unwrap();
for (def_tid, pi_state) in pi_def_map.iter() {
if *def_tid == def1 {
assert_eq!(pi_state.get_register(&rsi_reg), Data::new_top(rsi_reg.size));
} else if *def_tid == def2 {
assert_eq!(
pi_state.get_register(&rdi_reg),
Data::from_target(stack_id.clone(), bv(-8)),
);
}
}
}
#[test]
fn getting_blk_start_node_if_last_def() {
let mut setup = Setup::new();
let def1 = Def::assign(
"def1",
Variable::mock("RBP", 8 as u64),
Expression::var("RSP", 8),
);
let def2 = Def::assign(
"def2",
Variable::mock("RDI", 8 as u64),
Expression::var("RBP", 8).plus_const(-8),
);
let def3 = Def::assign(
"def3",
Variable::mock("RDI", 8 as u64),
Expression::Const(Bitvector::from_str_radix(16, "3002").unwrap()),
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let current_sub = setup.project.program.term.subs.get(0).unwrap();
setup.state.set_current_sub(current_sub);
let start_node = context
.block_maps
.block_start_last_def_map
.get(&(def3.tid.clone(), current_sub.tid.clone()))
.unwrap();
assert_eq!(
context.get_blk_start_node_if_last_def(&setup.state, &def1),
None
);
assert_eq!(
context.get_blk_start_node_if_last_def(&setup.state, &def2),
None
);
assert_eq!(
context.get_blk_start_node_if_last_def(&setup.state, &def3),
Some(start_node.clone())
);
}
#[test]
fn getting_source_node() {
let mut setup = Setup::new();
let call_tid = Tid::new("call_string");
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let current_sub = setup.project.program.term.subs.get(0).unwrap();
setup.state.set_current_sub(current_sub);
let blk_end_node_id = context
.block_maps
.jmp_to_blk_end_node_map
.get(&(call_tid.clone(), current_sub.tid.clone()))
.unwrap();
assert_eq!(
context.get_source_node(&setup.state, &call_tid),
*blk_end_node_id
);
}
#[test]
fn updating_target_state_for_callsite() {
let mut setup = Setup::new();
let caller_sub = Sub::mock("caller");
let r9_reg = Variable::mock("R9", 8 as u64);
let rbp_reg = Variable::mock("RBP", 8 as u64);
let rdi_reg = Variable::mock("RDI", 8 as u64);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let mut return_state = setup.state.clone();
// Test Case 1: No target state
assert_eq!(
context.update_target_state_for_callsite(None, None, &caller_sub),
None
);
// Test Case 2: Target state but no return state
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
setup
.state
.set_register_taint(&rdi_reg, Taint::Tainted(rdi_reg.size));
let new_state = context
.update_target_state_for_callsite(None, Some(&setup.state), &caller_sub)
.unwrap();
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
assert_eq!(*new_state.get_current_sub().as_ref().unwrap(), caller_sub);
// Test Case 3: Target state and return state
return_state.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
let new_state = context
.update_target_state_for_callsite(Some(&return_state), Some(&setup.state), &caller_sub)
.unwrap();
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
assert_eq!(*new_state.get_current_sub().as_ref().unwrap(), caller_sub);
}
#[test]
fn handling_assign_and_load() {
let mut setup = Setup::new();
let r9_reg = Variable::mock("R9", 8 as u64);
let rdi_reg = Variable::mock("RDI", 8 as u64);
let mock_assign_register = Def::assign(
"assign",
Variable::mock("R9", 8 as u64),
Expression::var("RDI", 8),
);
let mock_assign_stack = Def::assign(
"stack_assign",
Variable::mock("R9", 8 as u64),
Expression::var("RSP", 8),
);
let mock_load = Def::load(
"load",
Variable::mock("R9", 8 as u64),
Expression::var("RDI", 8),
);
let mut pi_map: HashMap<Tid, PointerInferenceState> = HashMap::new();
let stack_id = setup.pi_state.stack_id.clone();
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let current_sub = setup.project.program.term.subs.get(0).unwrap();
setup.state.set_current_sub(current_sub);
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
let mut new_state = context
.update_def(&setup.state, &mock_assign_register)
.unwrap();
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
// Test Case: None State
new_state.remove_all_register_taints();
assert_eq!(context.update_def(&new_state, &mock_assign_register), None);
// Test Case: Assign RSP Register
pi_map.insert(Tid::new("stack_assign"), setup.pi_state.clone());
new_state.set_pointer_inference_map(pi_map.clone());
new_state.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
new_state = context.update_def(&new_state, &mock_assign_stack).unwrap();
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(
new_state
.address_points_to_taint(Data::from_target(stack_id.clone(), bv(0)), &setup.pi_state),
true
);
// Test Case: Load
new_state.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
new_state = context.update_def(&new_state, &mock_load).unwrap();
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
}
#[test]
fn updating_def() {
let mut setup = Setup::new();
let r9_reg = Variable::mock("R9", 8 as u64);
let rdi_reg = Variable::mock("RDI", 8 as u64);
let mock_assign_register = Def::assign(
"assign",
Variable::mock("R9", 8 as u64),
Expression::var("RDI", 8),
);
let mock_assign_stack = Def::assign(
"stack_assign",
Variable::mock("R9", 8 as u64),
Expression::var("RSP", 8),
);
let mock_load = Def::load(
"load",
Variable::mock("R9", 8 as u64),
Expression::var("RDI", 8),
);
let mock_store = Def::store("store", Expression::var("R9", 8), Expression::var("RDI", 8));
let mut pi_map: HashMap<Tid, PointerInferenceState> = HashMap::new();
let stack_id = setup.pi_state.stack_id.clone();
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let current_sub = setup.project.program.term.subs.get(0).unwrap();
setup.state.set_current_sub(current_sub);
// Test Case: Assign R9 Register
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
let mut new_state = context
.update_def(&setup.state, &mock_assign_register)
.unwrap();
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
// Test Case: None State
new_state.remove_all_register_taints();
assert_eq!(context.update_def(&new_state, &mock_assign_register), None);
// Test Case: Assign RSP Register
pi_map.insert(Tid::new("stack_assign"), setup.pi_state.clone());
new_state.set_pointer_inference_map(pi_map.clone());
new_state.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
new_state = context.update_def(&new_state, &mock_assign_stack).unwrap();
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(
new_state
.address_points_to_taint(Data::from_target(stack_id.clone(), bv(0)), &setup.pi_state),
true
);
// Test Case: Load
new_state.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
new_state = context.update_def(&new_state, &mock_load).unwrap();
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
// Test Case: Store
new_state.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
setup
.pi_state
.set_register(&r9_reg, setup.base_eight_offset.clone());
new_state.set_pointer_inference_state_for_def(Some(setup.pi_state.clone()), &Tid::new("store"));
new_state = context.update_def(&new_state, &mock_store).unwrap();
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
assert_eq!(
new_state.address_points_to_taint(setup.base_eight_offset, &setup.pi_state,),
false
);
}
#[test]
fn updating_jumpsite() {
let mut setup = Setup::new();
let r9_reg = Variable::mock("R9", 8 as u64);
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let mut new_state = context
.update_jumpsite(
&setup.state,
&Jmp::branch("jump", "block2"),
Some(&Jmp::branch("jump", "block2")),
&Blk::mock(),
)
.unwrap();
let mut pi_map: HashMap<Tid, PointerInferenceState> = HashMap::new();
pi_map.insert(Tid::new("initial"), setup.pi_state);
new_state.set_pointer_inference_map(pi_map);
assert_eq!(
new_state.get_register_taint(&r9_reg),
Some(&Taint::Tainted(r9_reg.size))
);
assert_eq!(
new_state.address_points_to_taint(
setup.base_eight_offset,
new_state
.get_pointer_inference_state_at_def(&Tid::new("initial"))
.unwrap()
),
true
);
}
#[test]
fn updating_callsite() {
let mut setup = Setup::new();
let mut return_state: Option<&State> = None;
let mut target_state: Option<&State> = None;
let jump_term = Jmp::call("call_string", "sprintf", Some("block2"));
let r9_reg = Variable::mock("R9", 8 as u64);
let rbp_reg = Variable::mock("RBP", 8 as u64);
let rdi_reg = Variable::mock("RDI", 8 as u64);
let rax_reg = Variable::mock("RAX", 8 as u64);
let caller_sub = Sub::mock("caller");
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
// Test Case: No return state
assert_eq!(
context.update_callsite(
target_state,
return_state,
&caller_sub,
&jump_term,
&jump_term
),
None
);
// Test Case: Return state but no target state
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
setup
.state
.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
let cloned_state = setup.state.clone();
return_state = Some(&cloned_state);
let mut new_state = context
.update_callsite(
target_state,
return_state,
&caller_sub,
&jump_term,
&jump_term,
)
.unwrap();
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
// Test Case: Return and target state
setup.state.remove_all_register_taints();
setup
.state
.set_register_taint(&rdi_reg, Taint::Tainted(rdi_reg.size));
setup
.state
.set_register_taint(&rax_reg, Taint::Tainted(rax_reg.size));
target_state = Some(&setup.state);
new_state = context
.update_callsite(
target_state,
return_state,
&caller_sub,
&jump_term,
&jump_term,
)
.unwrap();
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(new_state.get_register_taint(&rax_reg), None);
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
}
#[test]
fn splitting_call_stub() {
let mut setup = Setup::new();
let r9_reg = Variable::mock("R9", 8 as u64);
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let mut new_state = context.split_call_stub(&setup.state).unwrap();
// Set pi_state to check for memory pointers
let mut pi_map: HashMap<Tid, PointerInferenceState> = HashMap::new();
pi_map.insert(Tid::new("initial"), setup.pi_state);
new_state.set_pointer_inference_map(pi_map);
assert_eq!(
new_state.get_register_taint(&r9_reg),
Some(&Taint::Tainted(r9_reg.size))
);
assert_eq!(
new_state.address_points_to_taint(
setup.base_eight_offset,
new_state
.get_pointer_inference_state_at_def(&Tid::new("initial"))
.unwrap()
),
true
);
}
#[test]
fn splitting_return_stub() {
let mut setup = Setup::new();
let r9_reg = Variable::mock("R9", 8 as u64);
let rax_reg = Variable::mock("RAX", 8 as u64);
let called_sub = Sub::mock("called");
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
setup
.state
.set_register_taint(&rax_reg, Taint::Tainted(rax_reg.size));
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
// Set pi_state to check for memory pointers
let mut new_state = context
.split_return_stub(&setup.state, &called_sub)
.unwrap();
let mut pi_map: HashMap<Tid, PointerInferenceState> = HashMap::new();
pi_map.insert(Tid::new("initial"), setup.pi_state);
new_state.set_pointer_inference_map(pi_map);
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(
new_state.get_register_taint(&rax_reg),
Some(&Taint::Tainted(rax_reg.size))
);
assert_eq!(
new_state.address_points_to_taint(
setup.base_eight_offset,
new_state
.get_pointer_inference_state_at_def(&Tid::new("initial"))
.unwrap()
),
true
);
}
#[test]
fn updating_call_stub() {
let mut setup = Setup::new();
setup.project.calling_conventions = vec![CallingConvention::mock_with_parameter_registers(
vec!["RDI".to_string(), "RSI".to_string()],
vec!["XMM0".to_string()],
)];
let r9_reg = Variable::mock("R9", 8 as u64); // non callee saved
let rbp_reg = Variable::mock("RBP", 8 as u64);
let rdi_reg = Variable::mock("RDI", 8 as u64);
let rsi_reg = Variable::mock("RSI", 8 as u64);
let mock_call = Jmp::call("call_string", "sprintf", Some("block2"));
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
setup
.state
.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
setup
.state
.save_taint_to_memory(&setup.base_sixteen_offset, Taint::Tainted(ByteSize::new(8)));
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let mut string_symbols: HashMap<Tid, &ExternSymbol> = HashMap::new();
let sprintf = &ExternSymbol::mock_string();
string_symbols.insert(Tid::new("sprintf"), sprintf);
let mut format_string_index: HashMap<String, usize> = HashMap::new();
format_string_index.insert("sprintf".to_string(), 1);
let context = Context::mock(
&setup.project,
string_symbols,
HashMap::new(),
format_string_index,
&pi_results,
&mem_image,
);
let current_sub = Sub::mock("func");
setup.state.set_current_sub(&current_sub);
let new_state = context.update_call_stub(&setup.state, &mock_call).unwrap();
assert_eq!(
new_state.address_points_to_taint(setup.base_sixteen_offset, &setup.pi_state),
true
);
assert_eq!(
new_state.address_points_to_taint(setup.base_eight_offset, &setup.pi_state),
false
);
assert_eq!(new_state.get_register_taint(&rdi_reg), None,);
assert_eq!(new_state.get_register_taint(&rsi_reg), None,);
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
assert_eq!(new_state.get_register_taint(&r9_reg), None);
}
#[test]
fn specializing_conditional() {
let mut setup = Setup::new();
let r9_reg = Variable::mock("R9", 8 as u64);
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let mut new_state = context.split_call_stub(&setup.state).unwrap();
let mut pi_map: HashMap<Tid, PointerInferenceState> = HashMap::new();
pi_map.insert(Tid::new("initial"), setup.pi_state);
new_state.set_pointer_inference_map(pi_map);
assert_eq!(
new_state.get_register_taint(&r9_reg),
Some(&Taint::Tainted(r9_reg.size))
);
assert_eq!(
new_state.address_points_to_taint(
setup.base_eight_offset,
new_state
.get_pointer_inference_state_at_def(&Tid::new("initial"))
.unwrap()
),
true
);
}
use std::collections::{HashMap, HashSet};
use crate::{
abstract_domain::{
AbstractDomain, AbstractIdentifier, DataDomain, IntervalDomain, MemRegion, SizedDomain,
TryToBitvec,
},
analysis::pointer_inference::{Data, State as PointerInferenceState},
checkers::cwe_476::Taint,
intermediate_representation::{
Arg, CallingConvention, Expression, ExternSymbol, Project, Sub, Variable,
},
prelude::*,
utils::binary::RuntimeMemoryImage,
};
#[derive(Serialize, Deserialize, Debug, Eq, Clone)]
pub struct State {
/// The set of currently tainted registers.
register_taint: HashMap<Variable, Taint>,
/// The Taint contained in memory objects
memory_taint: HashMap<AbstractIdentifier, MemRegion<Taint>>,
/// The set of addresses in the binary where string constants reside
string_constants: HashSet<String>,
/// A map from Def Tids to their corresponding pointer inference state.
/// The pointer inference states are calculated in a forward manner
/// from the BlkStart node when entering a BlkEnd node through a jump.
#[serde(skip_serializing)]
pi_def_map: Option<HashMap<Tid, PointerInferenceState>>,
/// Holds the currently analyzed subroutine term
current_sub: Option<Term<Sub>>,
}
impl PartialEq for State {
/// Two states are equal if the same values are tainted in both states.
///
/// The equality operator ignores the `pi_def_map` field,
/// since it only denotes an intermediate value.
fn eq(&self, other: &Self) -> bool {
self.register_taint == other.register_taint
&& self.memory_taint == other.memory_taint
&& self.string_constants == other.string_constants
}
}
impl AbstractDomain for State {
/// Merge two states.
/// Any value tainted in at least one input state is also tainted in the merged state.
///
/// The used algorithm for merging the taints contained in memory regions is unsound
/// when merging taints that intersect only partially.
/// However, this should not have an effect in practice,
/// since these values are usually unsound and unused by the program anyway.
fn merge(&self, other: &Self) -> Self {
let mut register_taint = self.register_taint.clone();
for (var, other_taint) in other.register_taint.iter() {
if let Some(taint) = self.register_taint.get(var) {
register_taint.insert(var.clone(), taint.merge(other_taint));
} else {
register_taint.insert(var.clone(), *other_taint);
}
}
let mut memory_taint = self.memory_taint.clone();
for (tid, other_mem_region) in other.memory_taint.iter() {
if let Some(mem_region) = memory_taint.get_mut(tid) {
for (index, taint) in other_mem_region.iter() {
mem_region.insert_at_byte_index(*taint, *index);
// Unsound in theory for partially intersecting taints. Should not matter in practice.
}
} else {
memory_taint.insert(tid.clone(), other_mem_region.clone());
}
}
let constants = self.string_constants.clone();
constants.union(&other.string_constants);
State {
register_taint,
memory_taint,
string_constants: constants,
pi_def_map: None, // At nodes this intermediate value can be safely forgotten.
current_sub: self.current_sub.clone(),
}
}
/// The state has no explicit Top element.
fn is_top(&self) -> bool {
false
}
}
impl State {
/// Get a new state in which only the parameter values of the given extern symbol are tainted.
pub fn new(
taint_source: &ExternSymbol,
stack_pointer_register: &Variable,
pi_state: Option<&PointerInferenceState>,
current_sub: &Term<Sub>,
) -> State {
let mut state = State {
register_taint: HashMap::new(),
memory_taint: HashMap::new(),
string_constants: HashSet::new(),
pi_def_map: None,
current_sub: Some(current_sub.clone()),
};
for parameter in taint_source.parameters.iter() {
match parameter {
Arg::Register { var, .. } => {
state
.register_taint
.insert(var.clone(), Taint::Tainted(var.size));
}
Arg::Stack { offset, size, .. } => {
if let Some(pi_state) = pi_state {
let address_exp =
Expression::Var(stack_pointer_register.clone()).plus_const(*offset);
let address = pi_state.eval(&address_exp);
state.save_taint_to_memory(&address, Taint::Tainted(*size));
}
}
}
}
state
}
/// Mark the value at the given address with the given taint.
///
/// If the address points to more than one object,
/// we merge the taint object with the object at the targets,
/// possibly tainting all possible targets.
pub fn save_taint_to_memory(&mut self, address: &Data, taint: Taint) {
if let Some((mem_id, offset)) = address.get_if_unique_target() {
if let Ok(position) = offset.try_to_bitvec() {
if let Some(mem_region) = self.memory_taint.get_mut(mem_id) {
mem_region.add(taint, position);
} else {
let mut mem_region = MemRegion::new(address.bytesize());
mem_region.add(taint, position);
self.memory_taint.insert(mem_id.clone(), mem_region);
}
}
} else {
for (mem_id, offset) in address.get_relative_values() {
if let Ok(position) = offset.try_to_bitvec() {
if let Some(mem_region) = self.memory_taint.get_mut(mem_id) {
let old_taint = mem_region.get(position.clone(), taint.bytesize());
mem_region.add(old_taint.merge(&taint), position.clone());
} else {
let mut mem_region = MemRegion::new(address.bytesize());
mem_region.add(taint, position.clone());
self.memory_taint.insert(mem_id.clone(), mem_region);
}
}
}
}
}
/// Returns the sub of the currently analysed nodes.
pub fn get_current_sub(&self) -> &Option<Term<Sub>> {
&self.current_sub
}
/// Set the current sub to locate the analysis.
pub fn set_current_sub(&mut self, current_sub: &Term<Sub>) {
self.current_sub = Some(current_sub.clone());
}
/// Sets the pointer inference to definition map for the current state.
pub fn set_pi_def_map(&mut self, pi_def_map: Option<HashMap<Tid, PointerInferenceState>>) {
self.pi_def_map = pi_def_map;
}
/// Gets the taint state of a register if there is one.
pub fn get_register_taint(&self, var: &Variable) -> Option<&Taint> {
self.register_taint.get(var)
}
/// Returns an iterator over currently tainted registers.
pub fn get_register_taints(&self) -> std::collections::hash_map::Iter<Variable, Taint> {
self.register_taint.iter()
}
/// Remove all memory taints
pub fn remove_all_memory_taints(&mut self) {
self.memory_taint = HashMap::new();
}
/// Remove all register taints
pub fn remove_all_register_taints(&mut self) {
self.register_taint = HashMap::new();
}
/// Gets the callee saved taints from the register taints.
pub fn get_callee_saved_register_taints(
&self,
calling_conv: &CallingConvention,
) -> HashMap<Variable, Taint> {
self.register_taint
.clone()
.iter()
.filter_map(|(register, taint)| {
if calling_conv
.callee_saved_register
.iter()
.any(|callee_saved_reg| register.name == *callee_saved_reg)
{
Some((register.clone(), *taint))
} else {
None
}
})
.collect()
}
/// Gets the string constant saved at the given address and saves it to the string constants field.
pub fn evaluate_constant(
&mut self,
runtime_memory_image: &RuntimeMemoryImage,
constant: Bitvector,
) {
if runtime_memory_image.is_global_memory_address(&constant) {
match runtime_memory_image.read_string_until_null_terminator(&constant) {
Ok(format_string) => {
self.string_constants.insert(format_string.to_string());
}
// TODO: Change to log
Err(_e) => (),
}
}
}
/// Taints input registers and evaluates constant memory addresses for simple assignments
/// and taints memory if a pointer is overwritten.
/// The taint on the result register is removed.
pub fn set_expression_taint_and_store_constants(
&mut self,
def_tid: &Tid,
result: &Variable,
expression: &Expression,
stack_pointer_register: &Variable,
runtime_memory_image: &RuntimeMemoryImage,
) {
self.remove_register_taint(result);
match expression {
Expression::Const(constant) => {
self.evaluate_constant(runtime_memory_image, constant.clone())
}
Expression::Var(var) => self.taint_variable_input(var, stack_pointer_register, def_tid),
Expression::BinOp { .. } => {
if let Some(pid_map) = self.pi_def_map.as_ref() {
if let Some(pi_state) = pid_map.get(def_tid) {
let address = pi_state.get_register(result);
self.save_taint_to_memory(&address, Taint::Tainted(result.size));
}
}
}
Expression::UnOp { arg, .. }
| Expression::Cast { arg, .. }
| Expression::Subpiece { arg, .. } => self.taint_def_input_register(
arg,
stack_pointer_register,
def_tid,
runtime_memory_image,
),
_ => (),
}
}
/// Taints the input register of a store instruction and removes the memory taint at the target address.
pub fn taint_value_to_be_stored(
&mut self,
def_tid: &Tid,
target: &Expression,
value: &Expression,
stack_pointer_register: &Variable,
runtime_memory_image: &RuntimeMemoryImage,
) {
if let Some(pid_map) = self.pi_def_map.as_ref() {
if let Some(pi_state) = pid_map.get(def_tid) {
let address = pi_state.eval(target);
if self.address_points_to_taint(address.clone(), pi_state) {
self.taint_def_input_register(
value,
stack_pointer_register,
def_tid,
runtime_memory_image,
);
self.remove_mem_taint_at_target(&address);
}
}
}
}
/// Taints all input register of an expression.
pub fn taint_def_input_register(
&mut self,
expr: &Expression,
stack_pointer_register: &Variable,
def_tid: &Tid,
runtime_memory_image: &RuntimeMemoryImage,
) {
match expr {
Expression::Const(constant) => {
self.evaluate_constant(runtime_memory_image, constant.clone())
}
Expression::Var(var) => self.taint_variable_input(var, stack_pointer_register, def_tid),
Expression::BinOp { lhs, rhs, .. } => {
self.taint_def_input_register(
lhs,
stack_pointer_register,
def_tid,
runtime_memory_image,
);
self.taint_def_input_register(
rhs,
stack_pointer_register,
def_tid,
runtime_memory_image,
);
}
Expression::UnOp { arg, .. }
| Expression::Cast { arg, .. }
| Expression::Subpiece { arg, .. } => self.taint_def_input_register(
arg,
stack_pointer_register,
def_tid,
runtime_memory_image,
),
_ => (),
}
}
/// Either taints the input register or a memory position if it is the stack pointer register.
pub fn taint_variable_input(
&mut self,
var: &Variable,
stack_pointer_register: &Variable,
def_tid: &Tid,
) {
if var.name == stack_pointer_register.name {
if let Some(pid_map) = self.pi_def_map.as_ref() {
if let Some(pi_state) = pid_map.get(def_tid) {
let address = pi_state.get_register(stack_pointer_register);
self.save_taint_to_memory(
&address,
Taint::Tainted(stack_pointer_register.size),
);
}
}
} else {
self.set_register_taint(var, Taint::Tainted(var.size));
}
}
/// Remove the taint in the specified memory regions at the specified offsets.
pub fn remove_mem_taint_at_target(&mut self, address: &Data) {
for (mem_id, offset) in address.get_relative_values() {
if let (Some(mem_region), Ok(position)) =
(self.memory_taint.get_mut(mem_id), offset.try_to_bitvec())
{
if let Some(taint) = mem_region.get_unsized(position.clone()) {
mem_region.remove(position, Bitvector::from_u64(u64::from(taint.bytesize())));
}
}
}
}
/// Set the taint of a register.
pub fn set_register_taint(&mut self, register: &Variable, taint: Taint) {
if taint.is_top() {
self.register_taint.remove(register);
} else {
self.register_taint.insert(register.clone(), taint);
}
}
/// Removes a specified register taint
pub fn remove_register_taint(&mut self, register: &Variable) {
self.register_taint.remove(register);
}
/// Return true if the memory object with the given ID contains a tainted value.
pub fn check_mem_id_for_taint(&self, id: &AbstractIdentifier) -> bool {
if let Some(mem_object) = self.memory_taint.get(id) {
for elem in mem_object.values() {
if elem.is_tainted() {
return true;
}
}
}
false
}
/// If the given address points to the stack,
/// return true if and only if the value at that stack position is tainted.
/// If the given address points to a non-stack memory object,
/// return true if the memory object contains any tainted value (at any position).
pub fn address_points_to_taint(&self, address: Data, pi_state: &PointerInferenceState) -> bool {
use crate::analysis::pointer_inference::object::ObjectType;
for (target, offset) in address.get_relative_values() {
if let Ok(Some(ObjectType::Stack)) = pi_state.memory.get_object_type(target) {
// Only check if the value at the address is tainted
if let (Some(mem_object), Ok(target_offset)) =
(self.memory_taint.get(target), offset.try_to_bitvec())
{
if let Some(taint) = mem_object.get_unsized(target_offset.clone()) {
if taint.is_tainted() {
return true;
}
}
}
} else {
// Check whether the memory object contains any taint.
if self.check_mem_id_for_taint(target) {
return true;
}
}
}
false
}
/// Removes all taints of registers that are not generic function parameters.
/// Since we don't know the actual calling convention of the call,
/// we approximate the parameters with all parameter registers of the standard calling convention of the project.
pub fn remove_non_parameter_taints_for_generic_function(&mut self, project: &Project) {
if let Some(calling_conv) = project.get_standard_calling_convention() {
let register_names: HashSet<String> = calling_conv
.integer_parameter_register
.iter()
.chain(calling_conv.float_parameter_register.iter())
.cloned()
.collect();
let taints = self.register_taint.clone();
for (register, _) in taints.iter() {
if register_names.get(&register.name).is_none() {
self.register_taint.remove(register);
}
}
}
}
/// Removes the taint of a callee saved register if it was identified as the return target of
/// a string symbol.
pub fn remove_callee_saved_taint_if_destination_parameter(
&mut self,
destination_address: &DataDomain<IntervalDomain>,
pi_state: &PointerInferenceState,
standard_cconv: &CallingConvention,
) {
for (var, _) in self.get_callee_saved_register_taints(standard_cconv).iter() {
let callee_saved_address = pi_state.eval(&Expression::Var(var.clone()));
if callee_saved_address == *destination_address {
self.remove_register_taint(var);
}
}
}
/// Remove the taint from all registers not contained in the callee-saved register list of the given calling convention.
pub fn remove_non_callee_saved_taint(&mut self, calling_conv: &CallingConvention) {
self.register_taint = self
.register_taint
.iter()
.filter_map(|(register, taint)| {
if calling_conv
.callee_saved_register
.iter()
.any(|callee_saved_reg| register.name == *callee_saved_reg)
{
Some((register.clone(), *taint))
} else {
None
}
})
.collect();
}
/// Remove all register taints except for the return register taints if available
/// This clears the state on the return stub edge
pub fn remove_all_except_return_register_taints(&mut self, return_registers: HashSet<String>) {
let tainted = self.register_taint.clone();
for (register, _taint) in tainted {
if return_registers.get(&register.name).is_none() {
self.register_taint.remove(&register);
}
}
}
/// Check whether `self` contains any taint at all.
pub fn is_empty(&self) -> bool {
self.memory_taint.is_empty() && self.register_taint.is_empty()
}
/// Checks whether the return registers are contained in the current tainted registers
pub fn check_return_registers_for_taint(&self, register_list: Vec<String>) -> bool {
// Check whether a register contains taint
for (register, taint) in &self.register_taint {
if register_list
.iter()
.any(|reg_name| *reg_name == register.name)
&& !taint.is_top()
{
return true;
}
}
false
}
/// Merges callee saved register taints into the current state
pub fn merge_callee_saved_taints_from_return_state(
&mut self,
return_state: &State,
calling_convention: Option<&CallingConvention>,
) {
if let Some(calling_conv) = calling_convention {
let callee_saved_registers: HashSet<String> =
calling_conv.callee_saved_register.iter().cloned().collect();
for (variable, taint) in return_state.get_register_taints() {
if callee_saved_registers.get(&variable.name).is_some() {
self.set_register_taint(variable, *taint);
}
}
}
}
}
#[cfg(test)]
mod tests;
use crate::analysis::pointer_inference::ValueDomain;
use crate::{abstract_domain::DataDomain, intermediate_representation::CastOpType};
use super::*;
fn extern_symbol(name: &str, return_args: Vec<Arg>, has_var_args: bool) -> ExternSymbol {
ExternSymbol {
tid: Tid::new(name.to_string()),
addresses: vec![],
name: name.into(),
calling_convention: None,
parameters: Vec::new(),
return_values: return_args,
no_return: false,
has_var_args,
}
}
fn bv(value: i64) -> ValueDomain {
ValueDomain::from(Bitvector::from_i64(value))
}
impl State {
pub fn mock_with_pi_state() -> (State, PointerInferenceState) {
let arg = Arg::Register {
var: Variable::mock("RAX", 8 as u64),
data_type: None,
};
let pi_state =
PointerInferenceState::new(&Variable::mock("RSP", 8 as u64), Tid::new("func"));
let symbol = extern_symbol("system", vec![arg], false);
let current_sub = Sub::mock("current");
let mut state = State::new(
&symbol,
&Variable::mock("RSP", 8 as u64),
Some(&pi_state),
&current_sub,
);
state.pi_def_map = Some(HashMap::new());
(state, pi_state)
}
pub fn set_pointer_inference_state_for_def(
&mut self,
pi_state: Option<PointerInferenceState>,
def_tid: &Tid,
) {
if let Some(pi_state) = pi_state {
if let Some(pid_map) = self.pi_def_map.as_mut() {
pid_map.insert(def_tid.clone(), pi_state);
}
}
}
pub fn set_pointer_inference_map(&mut self, pi_state_map: HashMap<Tid, PointerInferenceState>) {
self.pi_def_map = Some(pi_state_map);
}
pub fn get_pointer_inference_state_at_def(
&self,
def_tid: &Tid,
) -> Option<&PointerInferenceState> {
if let Some(pid_map) = self.pi_def_map.as_ref() {
return pid_map.get(def_tid);
}
None
}
}
struct Setup {
state: State,
pi_state: PointerInferenceState,
rdi: Variable,
rsi: Variable,
rsp: Variable,
constant: String,
constant_address: Bitvector,
def_tid: Tid,
stack_pointer: DataDomain<ValueDomain>,
base_eight_offset: DataDomain<ValueDomain>,
base_sixteen_offset: DataDomain<ValueDomain>,
}
impl Setup {
fn new() -> Self {
let (state, pi_state) = State::mock_with_pi_state();
let stack_id = pi_state.stack_id.clone();
Setup {
state,
pi_state,
rdi: Variable::mock("RDI", 8 as u64),
rsi: Variable::mock("RSI", 8 as u64),
rsp: Variable::mock("RSP", 8 as u64),
constant: String::from("Hello World"),
constant_address: Bitvector::from_u32(12290),
def_tid: Tid::new("def"),
stack_pointer: Data::from_target(stack_id.clone(), bv(0)),
base_eight_offset: Data::from_target(stack_id.clone(), bv(-8)),
base_sixteen_offset: Data::from_target(stack_id.clone(), bv(-16)),
}
}
}
#[test]
fn setting_expression_and_constants() {
let mut setup = Setup::new();
setup
.pi_state
.set_register(&setup.rdi, setup.base_eight_offset.clone());
setup
.state
.set_pointer_inference_state_for_def(Some(setup.pi_state.clone()), &setup.def_tid);
// Test Case 1: Constants
let copy_const_expr = Expression::const_from_apint(setup.constant_address);
setup
.state
.set_register_taint(&setup.rdi, Taint::Tainted(setup.rdi.size));
setup.state.set_expression_taint_and_store_constants(
&setup.def_tid,
&setup.rdi,
&copy_const_expr,
&setup.rsp,
&RuntimeMemoryImage::mock(),
);
assert_eq!(setup.state.get_register_taint(&setup.rdi), None);
assert_eq!(setup.state.string_constants.len(), 1);
assert_eq!(
setup.state.string_constants.get(&setup.constant),
Some(&setup.constant)
);
// Test Case 2: Variables
let copy_var_expr = Expression::var("RSI", 8);
setup
.state
.set_register_taint(&setup.rdi, Taint::Tainted(setup.rdi.size));
setup.state.set_expression_taint_and_store_constants(
&setup.def_tid,
&setup.rdi,
&copy_var_expr,
&setup.rsp,
&RuntimeMemoryImage::mock(),
);
assert_eq!(setup.state.get_register_taint(&setup.rdi), None);
assert_eq!(
setup.state.get_register_taint(&setup.rsi),
Some(&Taint::Tainted(setup.rsi.size))
);
// Test Case 2.5: Stack Pointer Assignment
let stack_expression = Expression::var("RSP", 8);
setup
.state
.set_register_taint(&setup.rdi, Taint::Tainted(setup.rdi.size));
setup.state.set_expression_taint_and_store_constants(
&setup.def_tid,
&setup.rdi,
&stack_expression,
&setup.rsp,
&RuntimeMemoryImage::mock(),
);
assert_eq!(setup.state.get_register_taint(&setup.rdi), None);
assert_eq!(
setup
.state
.address_points_to_taint(setup.stack_pointer, &setup.pi_state),
true
);
// Test Case 3: Bin Ops
let bin_op_expr = Expression::var("RBP", 8).plus_const(-8);
setup
.state
.set_register_taint(&setup.rdi, Taint::Tainted(setup.rdi.size));
setup.state.set_expression_taint_and_store_constants(
&setup.def_tid,
&setup.rdi,
&bin_op_expr,
&setup.rsp,
&RuntimeMemoryImage::mock(),
);
assert_eq!(setup.state.get_register_taint(&setup.rdi), None);
assert_eq!(
setup
.state
.address_points_to_taint(setup.base_eight_offset, &setup.pi_state),
true
);
// Test Case 4: Any other Expression
let cast_expr = Expression::var("RDI", 8)
.subpiece(ByteSize::new(0), ByteSize::new(4))
.cast(CastOpType::IntZExt);
setup
.state
.set_register_taint(&setup.rdi, Taint::Tainted(setup.rdi.size));
setup.state.set_expression_taint_and_store_constants(
&setup.def_tid,
&setup.rdi,
&cast_expr,
&setup.rsp,
&RuntimeMemoryImage::mock(),
);
assert_eq!(
setup.state.get_register_taint(&setup.rdi),
Some(&Taint::Tainted(setup.rdi.size))
);
}
#[test]
fn tainting_values_to_be_stored() {
let mut setup = Setup::new();
let stack_pointer = Variable::mock("RSP", 8 as u64);
// Test Case: Memory target is tainted. --> Taint the input register
setup
.pi_state
.set_register(&setup.rdi, setup.base_eight_offset.clone());
setup
.state
.set_pointer_inference_state_for_def(Some(setup.pi_state.clone()), &setup.def_tid);
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
setup.state.taint_value_to_be_stored(
&setup.def_tid,
&Expression::var("RDI", 8),
&Expression::var("RSI", 8),
&stack_pointer,
&RuntimeMemoryImage::mock(),
);
assert_eq!(
setup
.state
.address_points_to_taint(setup.base_eight_offset, &setup.pi_state),
false
);
assert_eq!(
setup.state.get_register_taint(&setup.rsi),
Some(&Taint::Tainted(setup.rsi.size))
);
// Test Case: Memory target is not tainted. --> Do nothing
setup.state.register_taint.remove(&setup.rsi);
setup
.pi_state
.set_register(&setup.rdi, setup.base_sixteen_offset.clone());
setup
.state
.set_pointer_inference_state_for_def(Some(setup.pi_state.clone()), &setup.def_tid);
setup.state.taint_value_to_be_stored(
&setup.def_tid,
&Expression::var("RDI", 8),
&Expression::var("RSI", 8),
&stack_pointer,
&RuntimeMemoryImage::mock(),
);
assert_eq!(setup.state.get_register_taint(&setup.rsi), None);
}
#[test]
fn tainting_def_input_register() {
let mut setup = Setup::new();
let rdi_reg = Variable::mock("RDI", 8 as u64);
let stack_pointer = Variable::mock("RSP", 8 as u64);
setup
.state
.set_pointer_inference_state_for_def(Some(setup.pi_state.clone()), &setup.def_tid);
// Test Case 1: Variable input
setup.state.taint_def_input_register(
&Expression::var("RDI", 8),
&stack_pointer,
&setup.def_tid,
&RuntimeMemoryImage::mock(),
);
assert_eq!(
setup.state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
// Test Case 2: Stack Pointer input
setup.state.taint_def_input_register(
&Expression::var("RSP", 8),
&stack_pointer,
&setup.def_tid,
&RuntimeMemoryImage::mock(),
);
assert_eq!(
setup
.state
.address_points_to_taint(setup.stack_pointer.clone(), &setup.pi_state),
true
);
setup.state.remove_all_register_taints();
// Test Case 3: Bin Op Input
setup.state.taint_def_input_register(
&Expression::var("RDI", 8).plus_const(8),
&stack_pointer,
&setup.def_tid,
&RuntimeMemoryImage::mock(),
);
assert_eq!(
setup.state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
setup.state.remove_all_register_taints();
// Test Case 4: Cast Op Input
setup.state.taint_def_input_register(
&Expression::var("RDI", 8).cast(CastOpType::IntZExt),
&stack_pointer,
&setup.def_tid,
&RuntimeMemoryImage::mock(),
);
assert_eq!(
setup.state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
}
#[test]
fn tainting_variable_input() {
let mut setup = Setup::new();
let rdi_reg = Variable::mock("RDI", 8 as u64);
let stack_pointer = Variable::mock("RSP", 8 as u64);
setup
.state
.set_pointer_inference_state_for_def(Some(setup.pi_state.clone()), &setup.def_tid);
// Test Case 1: Register input
setup
.state
.taint_variable_input(&rdi_reg, &stack_pointer, &setup.def_tid);
assert_eq!(
setup.state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
// Test Case 2: Stack Pointer input
setup
.state
.taint_variable_input(&stack_pointer, &stack_pointer, &setup.def_tid);
assert_eq!(
setup
.state
.address_points_to_taint(setup.stack_pointer.clone(), &setup.pi_state),
true
);
}
#[test]
fn removing_memory_taint_at_target() {
let mut setup = Setup::new();
// Test Case: Memory was tainted and taint is removed
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
assert_eq!(
setup
.state
.address_points_to_taint(setup.base_eight_offset.clone(), &setup.pi_state),
true
);
setup
.state
.remove_mem_taint_at_target(&setup.base_eight_offset);
assert_eq!(
setup
.state
.address_points_to_taint(setup.base_eight_offset, &setup.pi_state),
false
);
// Test Case: Memory was not tainted and nothing happens
assert_eq!(
setup
.state
.address_points_to_taint(setup.base_sixteen_offset.clone(), &setup.pi_state),
false
);
setup
.state
.remove_mem_taint_at_target(&setup.base_sixteen_offset);
assert_eq!(
setup
.state
.address_points_to_taint(setup.base_sixteen_offset, &setup.pi_state),
false
);
}
#[test]
fn saving_taint_to_memory() {
let mut setup = Setup::new();
assert_eq!(
setup
.state
.address_points_to_taint(setup.base_eight_offset.clone(), &setup.pi_state),
false
);
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
assert_eq!(
setup
.state
.address_points_to_taint(setup.base_eight_offset.clone(), &setup.pi_state),
true
);
}
#[test]
fn removing_non_parameter_taints_for_generic_function() {
let mut setup = Setup::new();
let mut mock_project = Project::mock_empty();
mock_project
.calling_conventions
.push(CallingConvention::mock());
setup
.state
.set_register_taint(&setup.rdi, Taint::Tainted(setup.rdi.size));
setup
.state
.set_register_taint(&setup.rsi, Taint::Tainted(setup.rsi.size));
setup
.state
.remove_non_parameter_taints_for_generic_function(&mock_project);
assert_eq!(
setup.state.get_register_taint(&setup.rdi),
Some(&Taint::Tainted(setup.rdi.size))
);
assert_eq!(setup.state.get_register_taint(&setup.rsi), None);
}
#[test]
fn removing_non_callee_saved_taint() {
let mut setup = Setup::new();
let cconv = CallingConvention::mock();
let rbp_reg = Variable::mock("RBP", 8 as u64);
setup
.state
.set_register_taint(&setup.rdi, Taint::Tainted(setup.rsi.size));
setup
.state
.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
setup.state.remove_non_callee_saved_taint(&cconv);
assert_eq!(setup.state.get_register_taint(&setup.rdi), None);
assert_eq!(
setup.state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
}
#[test]
fn removing_all_but_return() {
let mut setup = Setup::new();
let mut return_regs: HashSet<String> = HashSet::new();
return_regs.insert("RAX".to_string());
let rax_reg = Variable::mock("RAX", 8 as u64);
setup
.state
.set_register_taint(&setup.rdi, Taint::Tainted(setup.rsi.size));
setup
.state
.set_register_taint(&rax_reg, Taint::Tainted(rax_reg.size));
setup
.state
.remove_all_except_return_register_taints(return_regs);
assert_eq!(setup.state.get_register_taint(&setup.rdi), None);
assert_eq!(
setup.state.get_register_taint(&rax_reg),
Some(&Taint::Tainted(rax_reg.size))
);
}
#[test]
fn checking_if_address_points_to_taint() {
let mut setup = Setup::new();
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
assert_eq!(
setup
.state
.address_points_to_taint(setup.base_eight_offset, &setup.pi_state),
true
);
assert_eq!(
setup
.state
.address_points_to_taint(setup.base_sixteen_offset, &setup.pi_state),
false
);
}
#[test]
fn checking_return_registers_for_taint() {
let mut setup = Setup::new();
let rax_reg = Variable::mock("RAX", 8 as u64);
let rdi_reg = Variable::mock("RDI", 8 as u64);
// Test Case: Empty Taint
assert_eq!(
setup
.state
.check_return_registers_for_taint(vec!["RAX".to_string()]),
false
);
// Test Case: No return register tainted
setup
.state
.set_register_taint(&rdi_reg, Taint::Tainted(rdi_reg.size));
assert_eq!(
setup
.state
.check_return_registers_for_taint(vec!["RAX".to_string()]),
false
);
// Test Case: Return register tainted
setup
.state
.set_register_taint(&rax_reg, Taint::Tainted(rax_reg.size));
assert_eq!(
setup
.state
.check_return_registers_for_taint(vec!["RAX".to_string()]),
true
);
}
......@@ -171,6 +171,27 @@ mod tests {
data_type: None,
}
}
pub fn mock_register_with_data_type(
name: impl ToString,
size_in_bytes: impl Into<ByteSize>,
data_type: Option<Datatype>,
) -> Arg {
Arg::Register {
var: Variable::mock(name.to_string(), size_in_bytes),
data_type,
}
}
pub fn mock_pointer_register(
name: impl ToString,
size_in_bytes: impl Into<ByteSize>,
) -> Arg {
Arg::Register {
var: Variable::mock(name.to_string(), size_in_bytes),
data_type: Some(Datatype::Pointer),
}
}
}
impl ExternSymbol {
......@@ -186,5 +207,18 @@ mod tests {
has_var_args: false,
}
}
pub fn mock_string() -> Self {
ExternSymbol {
tid: Tid::new("sprintf"),
addresses: vec!["UNKNOWN".to_string()],
name: "sprintf".to_string(),
calling_convention: Some("__stdcall".to_string()),
parameters: vec![Arg::mock_register("RDI", 8), Arg::mock_register("RSI", 8)],
return_values: vec![Arg::mock_register("RAX", 8)],
no_return: false,
has_var_args: true,
}
}
}
}
......@@ -60,8 +60,11 @@ You can find out more information about each check, including known false positi
by reading the check-specific module documentation in the [`checkers`] module.
*/
use abstract_domain::BricksDomain;
use crate::analysis::graph::Graph;
use crate::analysis::pointer_inference::PointerInference;
use crate::analysis::string_abstraction::StringAbstraction;
use crate::intermediate_representation::Project;
use crate::utils::binary::RuntimeMemoryImage;
use crate::utils::log::{CweWarning, LogMessage};
......@@ -140,6 +143,8 @@ pub struct AnalysisResults<'a> {
pub project: &'a Project,
/// The result of the pointer inference analysis if already computed.
pub pointer_inference: Option<&'a PointerInference<'a>>,
/// The result of the string abstraction if already computed.
pub string_abstraction: Option<&'a StringAbstraction<'a, BricksDomain>>,
}
impl<'a> AnalysisResults<'a> {
......@@ -156,6 +161,7 @@ impl<'a> AnalysisResults<'a> {
control_flow_graph,
project,
pointer_inference: None,
string_abstraction: None,
}
}
......@@ -186,4 +192,33 @@ impl<'a> AnalysisResults<'a> {
..self
}
}
/// Compute the string abstraction.
/// As the string abstraction depends on the pointer inference, the
/// pointer inference is also computed and put into the `AnalysisResults` struct.
/// The result gets returned, but not saved to the `AnalysisResults` struct itself.
pub fn compute_string_abstraction(
&'a self,
config: &serde_json::Value,
pi_results: Option<&'a PointerInference<'a>>,
) -> StringAbstraction<BricksDomain> {
crate::analysis::string_abstraction::run(
self.project,
self.runtime_memory_image,
self.control_flow_graph,
pi_results.unwrap(),
serde_json::from_value(config.clone()).unwrap(),
)
}
/// Create a new `AnalysisResults` struct containing the given string abstraction results.
pub fn set_string_abstraction<'b: 'a>(
self,
string_abstraction: Option<&'b StringAbstraction<'a, BricksDomain>>,
) -> AnalysisResults<'b> {
AnalysisResults {
string_abstraction,
..self
}
}
}
......@@ -565,8 +565,8 @@ impl ExternSymbol {
calling_convention: self.calling_convention,
parameters,
return_values,
no_return: self.no_return,
has_var_args: self.has_var_args,
no_return: symbol.no_return,
has_var_args: symbol.has_var_args,
}
}
}
......
......@@ -118,7 +118,7 @@ pub fn parse_format_string_parameters(
Ok(datatype_map)
}
/// Returns an argument vector of detected variable parameters if they are of type string.
/// Returns an argument vector of detected variable parameters.
pub fn get_variable_parameters(
project: &Project,
pi_state: &PointerInferenceState,
......@@ -140,15 +140,20 @@ pub fn get_variable_parameters(
);
if let Ok(format_string) = format_string_results.as_ref() {
if let Ok(parameters) =
parse_format_string_parameters(format_string, &project.datatype_properties)
{
let parameter_result =
parse_format_string_parameters(format_string, &project.datatype_properties);
match parameter_result {
Ok(parameters) => {
return Ok(calculate_parameter_locations(
parameters,
extern_symbol.get_calling_convention(project),
format_string_index,
));
}
Err(e) => {
return Err(anyhow!("Could not parse variable parameters: {}", e));
}
}
}
Err(anyhow!(
......
......@@ -216,6 +216,7 @@ mod tests {
mark_architecture_skipped(&mut tests, "ppc64"); // Ghidra generates mangled function names here for some reason.
mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
mark_skipped(&mut tests, "x86", "gcc");
mark_skipped(&mut tests, "x86", "clang"); // Return value detection insufficient for x86
mark_skipped(&mut tests, "arm", "clang"); // Loss of stack pointer position
mark_skipped(&mut tests, "aarch64", "clang"); // Loss of stack pointer position
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment