Unverified Commit 8e575abc by Enkelmann Committed by GitHub

Rust interface (#70)

Added brand new (and still experimental) checks for CWEs 415 and 416 together with a new interprocedural data-flow analysis engine written in Rust. Add `-partial=Memory` as command line flag to try out the new checks.
parent fd91fb5c
...@@ -227,3 +227,15 @@ test/artificial_samples/dockcross* ...@@ -227,3 +227,15 @@ test/artificial_samples/dockcross*
.#* .#*
.sconsign.dblite .sconsign.dblite
### Rust ###
# Generated by Cargo
# will have compiled files and executables
/target/
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock
# These are backup files generated by rustfmt
**/*.rs.bk
#!/bin/bash #!/bin/bash
docker run --rm -t cwe-checker dune runtest && pytest docker run --rm -t cwe-checker cargo test && docker run --rm -t cwe-checker dune runtest && pytest
...@@ -4,8 +4,10 @@ dev ...@@ -4,8 +4,10 @@ dev
- Added a lot more test cases to acceptance tests (PR #46) - Added a lot more test cases to acceptance tests (PR #46)
- Reworked CWE-476 check to track stack variables (PR #47) - Reworked CWE-476 check to track stack variables (PR #47)
- Switched to BAP 2.0 (PR #49) - Switched to BAP 2.0 (PR #49)
- Several internal code improvements (PRs #51, #58, #62) - Several internal code improvements (PRs #51, #58, #62, #67)
- Added deprecation warnings to the emulation based checks (PR #66) - Added deprecation warnings to the emulation based checks (PR #66)
- Added a new (still experimental) engine for data-flow analysis written in Rust (PR #70)
- Added new, data-flow based checks for CWEs 415 and 416 (PR #70)
0.3 (2019-12) 0.3 (2019-12)
==== ====
......
[workspace]
members = ["cwe_checker_rs"]
...@@ -4,8 +4,9 @@ COPY . /home/bap/cwe_checker/ ...@@ -4,8 +4,9 @@ COPY . /home/bap/cwe_checker/
RUN sudo chown -R bap:bap /home/bap/cwe_checker \ RUN sudo chown -R bap:bap /home/bap/cwe_checker \
&& cd /home/bap/cwe_checker \ && cd /home/bap/cwe_checker \
&& make all && make clean && make all
WORKDIR /home/bap/cwe_checker WORKDIR /home/bap/cwe_checker
ENTRYPOINT ["opam", "config", "exec", "--"] ENTRYPOINT ["opam", "config", "exec", "--"]
CMD cwe_checker /tmp/input
.PHONY: all clean test uninstall docker .PHONY: all clean test uninstall docker
all: all:
cargo build --release
cp target/release/libcwe_checker_rs.a src/libcwe_checker_rs.a
cp target/release/libcwe_checker_rs.so src/dllcwe_checker_rs.so
dune build dune build
dune install dune install
cd plugins/cwe_checker; make all; cd ../.. cd plugins/cwe_checker; make all; cd ../..
cd plugins/cwe_checker_emulation; make all; cd ../.. cd plugins/cwe_checker_emulation; make all; cd ../..
cd plugins/cwe_checker_type_inference; make all; cd ../.. cd plugins/cwe_checker_type_inference; make all; cd ../..
cd plugins/cwe_checker_type_inference_print; make all; cd ../.. cd plugins/cwe_checker_type_inference_print; make all; cd ../..
cd plugins/cwe_checker_pointer_inference_debug; make all; cd ../..
test: test:
cargo test
cd test/unit/ && ./specify_test_files_for_compilation.sh cd test/unit/ && ./specify_test_files_for_compilation.sh
dune runtest dune runtest
cd test/artificial_samples; scons; cd ../.. cd test/artificial_samples; scons; cd ../..
pytest -v pytest -v --ignore=_build
clean: clean:
cargo clean
rm -f src/libcwe_checker_rs.a
rm -f src/dllcwe_checker_rs.so
dune clean dune clean
bapbuild -clean bapbuild -clean
rm -f -r doc/html rm -f -r doc/html
...@@ -22,6 +30,7 @@ clean: ...@@ -22,6 +30,7 @@ clean:
cd plugins/cwe_checker_emulation; make clean; cd ../.. cd plugins/cwe_checker_emulation; make clean; cd ../..
cd plugins/cwe_checker_type_inference; make clean; cd ../.. cd plugins/cwe_checker_type_inference; make clean; cd ../..
cd plugins/cwe_checker_type_inference_print; make clean; cd ../.. cd plugins/cwe_checker_type_inference_print; make clean; cd ../..
cd plugins/cwe_checker_pointer_inference_debug; make clean; cd ../..
uninstall: uninstall:
dune uninstall dune uninstall
...@@ -29,6 +38,7 @@ uninstall: ...@@ -29,6 +38,7 @@ uninstall:
cd plugins/cwe_checker_emulation; make uninstall; cd ../.. cd plugins/cwe_checker_emulation; make uninstall; cd ../..
cd plugins/cwe_checker_type_inference; make uninstall; cd ../.. cd plugins/cwe_checker_type_inference; make uninstall; cd ../..
cd plugins/cwe_checker_type_inference_print; make uninstall; cd ../.. cd plugins/cwe_checker_type_inference_print; make uninstall; cd ../..
cd plugins/cwe_checker_pointer_inference_debug; make uninstall; cd ../..
documentation: documentation:
dune build @doc dune build @doc
......
...@@ -11,51 +11,26 @@ ...@@ -11,51 +11,26 @@
## What is cwe_checker? ## ## What is cwe_checker? ##
*cwe_checker* is a suite of tools to detect common bug classes such as use of dangerous functions and simple integer overflows. These bug classes are formally known as [Common Weakness Enumerations](https://cwe.mitre.org/) (CWEs). Its main goal is to aid analysts to quickly find vulnerable code paths. *cwe_checker* is a suite of tools to detect common bug classes such as use of dangerous functions and simple integer overflows. These bug classes are formally known as [Common Weakness Enumerations](https://cwe.mitre.org/) (CWEs). Its main goal is to aid analysts to quickly find vulnerable code paths.
Its main focus are ELF binaries that are commonly found on Linux and Unix operating systems. *cwe_checker* is built on top of [BAP](https://github.com/BinaryAnalysisPlatform/bap)(Binary Analysis Platform). By using BAP, we are not restricted to one low level instruction set architectures like Intel x86. BAP lifts several of them to one common intermediate representation (IR). cwe_checker implements its analyses on this IR. At time of writing, BAP 1.6 supports Intel x86/x64, ARM, MIPS, and PPC amongst others. Hence, this makes *cwe_checker* a valuable tool for firmware analysis. Its main focus are ELF binaries that are commonly found on Linux and Unix operating systems. *cwe_checker* is built on top of [BAP](https://github.com/BinaryAnalysisPlatform/bap) (Binary Analysis Platform). By using BAP, we are not restricted to one low level instruction set architectures like Intel x86. BAP lifts several of them to one common intermediate representation (IR). cwe_checker implements its analyses on this IR. At time of writing, BAP 2.1 supports Intel x86/x64, ARM, MIPS, and PPC amongst others. Hence, this makes *cwe_checker* a valuable tool for firmware analysis.
*cwe_checker* implements a modular architecture that allows to add new analyses with ease. So far the following analyses are implemented across several BAP plugins:
- [CWE-125](https://cwe.mitre.org/data/definitions/125.html): Out-of-bounds read (via emulation)
- [CWE-190](https://cwe.mitre.org/data/definitions/190.html): Integer Overflow or Wraparound
- [CWE-215](https://cwe.mitre.org/data/definitions/215.html): Information Exposure Through Debug Information
- [CWE-243](https://cwe.mitre.org/data/definitions/243.html): Creation of chroot Jail Without Changing Working Directory
- [CWE-248](https://cwe.mitre.org/data/definitions/248.html): Uncaught Exception
- [CWE-332](https://cwe.mitre.org/data/definitions/332.html): Insufficient Entropy in PRNG
- [CWE-367](https://cwe.mitre.org/data/definitions/367.html): Time-of-check Time-of-use (TOCTOU) Race Condition
- [CWE-426](https://cwe.mitre.org/data/definitions/426.html): Untrusted Search Path
- [CWE-457](https://cwe.mitre.org/data/definitions/457.html): Use of Uninitialized Variable
- [CWE-467](https://cwe.mitre.org/data/definitions/467.html): Use of sizeof() on a Pointer Type
- [CWE-476](https://cwe.mitre.org/data/definitions/476.html): NULL Pointer Dereference
- [CWE-560](https://cwe.mitre.org/data/definitions/560.html): Use of umask() with chmod-style Argument
- [CWE-676](https://cwe.mitre.org/data/definitions/676.html): Use of Potentially Dangerous Function
- [CWE-782](https://cwe.mitre.org/data/definitions/782.html): Exposed IOCTL with Insufficient Access Control
**NOTE:** We recently decided to deprecate the support for the emulation based checks for CWEs 415, 416 and 787. Users of these checks should take a look at the [BAP toolkit](https://github.com/BinaryAnalysisPlatform/bap-toolkit), which provides better-maintained versions of these checks.
Please note that some of the above analyses only are partially implemented at the moment. Furthermore, false positives are to be expected due to shortcuts and the nature of static analysis as well as over-approximation. For more information about the individual checks you can look at the [online documentation](https://fkie-cad.github.io/cwe_checker/doc/html/cwe_checker/index.html).
*cwe_checker* comes with scripts for IDA Pro and Ghidra, which parse the output of *cwe_checker* and annotate the found CWEs in the disassembler for easier manual analysis. See the [online documentation](https://fkie-cad.github.io/cwe_checker/doc/html/cwe_checker/index.html#ToolIntegration) for their usage. The IDA Pro plugin also uses colors to represent the severeness of the found issues (yellow, orange, or red). The following screenshot shows some results:
<p align="center">
<img src="doc/images/example_ida_anotation.png" alt="IDA Pro anotation" width="50%" height="50%"/>
</p>
## Why use cwe_checker? ##
The following arguments should convince you to give *cwe_checker* a try: The following arguments should convince you to give *cwe_checker* a try:
- it is very easy to setup, just build the Docker container! - it is very easy to set up, just build the Docker container!
- it analyzes ELF binaries of several CPU architectures including x86, ARM, MIPS, and PPC - it analyzes ELF binaries of several CPU architectures including x86, ARM, MIPS, and PPC
- it is extensible due to its plugin-based architecture - it is extensible due to its plugin-based architecture
- it is configureable, e.g. apply analyses to new APIs - it is configureable, e.g. apply analyses to new APIs
- view results annotated in IDA Pro and Ghidra - view results annotated in IDA Pro and Ghidra
- *cwe_checker* can be integrated as a plugin into [FACT](https://github.com/fkie-cad/FACT_core) - *cwe_checker* can be integrated as a plugin into [FACT](https://github.com/fkie-cad/FACT_core)
## How to install cwe_checker? ## ## Installation ##
There are several ways to install cwe_checker. The recommended way is to get cwe_checker from the Ocaml package manager Opam. You can install cwe_checker via the package [cwe_checker](https://opam.ocaml.org/packages/cwe_checker/) (`opam install cwe_checker`). This gives you a stable version of cwe_checker. The simplest way is to pull the latest Docker image from [dockerhub](https://hub.docker.com/r/fkiecad/cwe_checker):
Another option is to pull the latest Docker image from [dockerhub](https://hub.docker.com/r/fkiecad/cwe_checker):
- `docker pull fkiecad/cwe_checker:latest` yields an image based on the current master branch. - `docker pull fkiecad/cwe_checker:latest` yields an image based on the current master branch.
- `docker pull fkiecad/cwe_checker:stable` yields an image based on the latest stable release version. - `docker pull fkiecad/cwe_checker:stable` yields an image based on the latest stable release version.
If you plan to develop cwe_checker, it is recommended to build it using the provided `Makefile`. In this case you must ensure that all dependencies are fulfilled: If you want to build the docker image yourself, just run `docker build -t cwe_checker .`
Another way is to get cwe_checker from the Ocaml package manager Opam. You can install cwe_checker via the package [cwe_checker](https://opam.ocaml.org/packages/cwe_checker/) (`opam install cwe_checker`). This gives you the latest stable release version of the *cwe_checker*.
If you plan to develop *cwe_checker*, it is recommended to build it using the provided `Makefile`. In this case you must ensure that all dependencies are fulfilled:
- Ocaml 4.07.1 - Ocaml 4.07.1
- Opam 2.0.2 - Opam 2.0.2
- dune >= 1.6 - dune >= 1.6
...@@ -67,15 +42,57 @@ If you plan to develop cwe_checker, it is recommended to build it using the prov ...@@ -67,15 +42,57 @@ If you plan to develop cwe_checker, it is recommended to build it using the prov
- pytest >= 3.5.1 (for tests) - pytest >= 3.5.1 (for tests)
- SCons >= 3.0.5 (for tests) - SCons >= 3.0.5 (for tests)
- odoc >= 1.4 (for documentation) - odoc >= 1.4 (for documentation)
- [Rust](https://www.rust-lang.org) >= 1.44.1
Just run `make all` to compile and register the plugin with BAP. You can run the test suite via `make test`. Documentation can be built via `make documentation`. Just run `make all` to compile and register the plugin with BAP. You can run the test suite via `make test`. Documentation can be built via `make documentation`.
## How to use cwe_checker? ## ## Usage ##
The usage is straight forward: adjust the `config.json` (if needed) and call BAP with *cwe_checker* as a pass.
``` bash The *cwe_checker* takes as input a binary file, runs several [checks](#checks) based on static analysis on the binary and then outputs a list of CWE warnings that have been found during the analysis.
bap PATH_TO_BINARY --pass=cwe-checker --cwe-checker-config=src/config.json
If you use the official docker image, just run
```bash
docker run --rm -v /PATH/TO/BINARY:/tmp/input fkiecad/cwe_checker cwe_checker /tmp/input
```
If you installed the *cwe_checker* locally (e.g. via the Opam package), run
```bash
cwe_checker BINARY
``` ```
For further information see the [online documentation](https://fkie-cad.github.io/cwe_checker/doc/html/cwe_checker/index.html). You can also build the documentation locally via `make documentation` and then browse it in the *doc/html/* folder. You can adjust the behavior of most checks via a configuration file located at `src/config.json`. If you modify it, add the command line flag `-config=src/config.json` to tell the *cwe_checker* to use the modified file.
For more information on usage instructions and available command line flags, see the [online documentation](https://fkie-cad.github.io/cwe_checker/doc/html/cwe_checker/index.html). You can also build the documentation locally via `make documentation` and then browse it in the *doc/html/* folder.
## Implemented Checks <a name=checks></a> ##
So far the following analyses are implemented:
- [CWE-125](https://cwe.mitre.org/data/definitions/125.html): Out-of-bounds read (via emulation)
- [CWE-190](https://cwe.mitre.org/data/definitions/190.html): Integer Overflow or Wraparound
- [CWE-215](https://cwe.mitre.org/data/definitions/215.html): Information Exposure Through Debug Information
- [CWE-243](https://cwe.mitre.org/data/definitions/243.html): Creation of chroot Jail Without Changing Working Directory
- [CWE-248](https://cwe.mitre.org/data/definitions/248.html): Uncaught Exception
- [CWE-332](https://cwe.mitre.org/data/definitions/332.html): Insufficient Entropy in PRNG
- [CWE-367](https://cwe.mitre.org/data/definitions/367.html): Time-of-check Time-of-use (TOCTOU) Race Condition
- [CWE-415](https://cwe.mitre.org/data/definitions/415.html): Double Free *(still experimental)*
- [CWE-416](https://cwe.mitre.org/data/definitions/416.html): Use After Free *(still experimental)*
- [CWE-426](https://cwe.mitre.org/data/definitions/426.html): Untrusted Search Path
- [CWE-457](https://cwe.mitre.org/data/definitions/457.html): Use of Uninitialized Variable
- [CWE-467](https://cwe.mitre.org/data/definitions/467.html): Use of sizeof() on a Pointer Type
- [CWE-476](https://cwe.mitre.org/data/definitions/476.html): NULL Pointer Dereference
- [CWE-560](https://cwe.mitre.org/data/definitions/560.html): Use of umask() with chmod-style Argument
- [CWE-676](https://cwe.mitre.org/data/definitions/676.html): Use of Potentially Dangerous Function
- [CWE-782](https://cwe.mitre.org/data/definitions/782.html): Exposed IOCTL with Insufficient Access Control
Please note that some of the above analyses only are partially implemented at the moment. Furthermore, false positives are to be expected due to shortcuts and the nature of static analysis as well as over-approximation. For more information about the individual checks you can look at the [online documentation](https://fkie-cad.github.io/cwe_checker/doc/html/cwe_checker/index.html).
**NOTE:** The new memory checks for CWEs 415 and 416 are still very experimental and are disabled on a standard run. You can try them out using the `-partial=Memory` command line flag.
**NOTE:** We recently decided to deprecate the support for the old emulation based checks for CWEs 415, 416 and 787. In addition to trying out the new memory checks, users of these checks should also take a look at the [BAP toolkit](https://github.com/BinaryAnalysisPlatform/bap-toolkit), which provides better-maintained (and still emulation based) versions of these checks.
## Integration into other tools ##
*cwe_checker* comes with scripts for IDA Pro and Ghidra, which parse the output of *cwe_checker* and annotate the found CWEs in the disassembler for easier manual analysis. See the [online documentation](https://fkie-cad.github.io/cwe_checker/doc/html/cwe_checker/index.html#ToolIntegration) for their usage. The IDA Pro plugin also uses colors to represent the severeness of the found issues (yellow, orange, or red). The following screenshot shows some results:
<p align="center">
<img src="doc/images/example_ida_anotation.png" alt="IDA Pro anotation" width="50%" height="50%"/>
</p>
## How to extend cwe_checker? ## ## How to extend cwe_checker? ##
You can find some information about how to write your own check [here](https://fkie-cad.github.io/cwe_checker/doc/html/cwe_checker/index.html#HackingHowto) You can find some information about how to write your own check [here](https://fkie-cad.github.io/cwe_checker/doc/html/cwe_checker/index.html#HackingHowto)
......
[package]
name = "cwe_checker_rs"
version = "0.1.0"
authors = ["Nils-Edvin Enkelmann <nils-edvin.enkelmann@fkie.fraunhofer.de>"]
edition = "2018"
[dependencies]
apint = "0.2"
serde = {version = "1.0", features = ["derive", "rc"]}
serde_json = "1.0"
serde_yaml = "0.8"
ocaml = "0.9.2"
petgraph = { version = "0.5", features = ["default", "serde-1"] }
fnv = "1.0" # a faster hash function for small keys like integers
anyhow = "1.0" # for easy error types
crossbeam-channel = "0.4"
[lib]
name = "cwe_checker_rs"
crate-type = ["staticlib", "cdylib"]
use crate::bil::*;
use crate::prelude::*;
use serde::{Deserialize, Serialize};
/// The main trait describing an abstract domain.
///
/// Each abstract domain is partially ordered and has a maximal element (which can be generated by `top()`).
/// Abstract domains of the same type can be merged.
///
/// TODO: Decide if and how to represent intersects and bottom values!
pub trait AbstractDomain: Sized + Eq + Clone {
/// The maximal value of a domain.
/// Usually it indicates a value for which nothing is known.
fn top(&self) -> Self;
fn merge(&self, other: &Self) -> Self {
if self == other {
self.clone()
} else {
self.top()
}
}
/// Returns whether the element represents the top element or not.
fn is_top(&self) -> bool {
*self == self.top()
}
}
/// A trait for abstract domains that represent values that can be loaded into register or written onto the stack.
/// Every value has a determined and immutable length (in bits).
pub trait ValueDomain: AbstractDomain {
/// Returns the size of the value in bits
fn bitsize(&self) -> BitSize;
/// Return a new top element with the given bitsize
fn new_top(bitsize: BitSize) -> Self;
/// Compute the (abstract) result of a binary operation
fn bin_op(&self, op: BinOpType, rhs: &Self) -> Self;
/// Compute the (abstract) result of a unary operation
fn un_op(&self, op: UnOpType) -> Self;
/// extract a sub-bitvector
fn extract(&self, low_bit: BitSize, high_bit: BitSize) -> Self {
Self::new_top(high_bit - low_bit) // TODO: This needs a unit test whether the result has the correct bitwidth!
}
/// Extend a bitvector using the given cast type
fn cast(&self, kind: CastType, width: BitSize) -> Self;
/// Concatenate two bitvectors
fn concat(&self, other: &Self) -> Self {
Self::new_top(self.bitsize() + other.bitsize())
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum BitvectorDomain {
Top(BitSize),
Value(Bitvector),
}
impl ValueDomain for BitvectorDomain {
fn bitsize(&self) -> BitSize {
use BitvectorDomain::*;
match self {
Top(bitsize) => *bitsize,
Value(bitvec) => bitvec.width().to_usize() as u16,
}
}
fn new_top(bitsize: BitSize) -> BitvectorDomain {
BitvectorDomain::Top(bitsize)
}
/// Evaluate the given binary operation.
/// Note that this function assumes that both values have the same bitsize.
/// If not, this function should panic.
fn bin_op(&self, op: BinOpType, rhs: &Self) -> Self {
use BinOpType::*;
match op {
LSHIFT | RSHIFT | ARSHIFT => (),
_=> assert_eq!(self.bitsize(), rhs.bitsize())
}
match (self, rhs) {
(BitvectorDomain::Value(lhs_bitvec), BitvectorDomain::Value(rhs_bitvec)) => match op {
PLUS => BitvectorDomain::Value(lhs_bitvec + rhs_bitvec),
MINUS => BitvectorDomain::Value(lhs_bitvec - rhs_bitvec),
TIMES => BitvectorDomain::Value(lhs_bitvec * rhs_bitvec),
DIVIDE => BitvectorDomain::Value(
lhs_bitvec.clone().into_checked_udiv(rhs_bitvec).unwrap(),
),
SDIVIDE => BitvectorDomain::Value(
lhs_bitvec.clone().into_checked_sdiv(rhs_bitvec).unwrap(),
),
MOD => BitvectorDomain::Value(
lhs_bitvec.clone().into_checked_urem(rhs_bitvec).unwrap(),
),
SMOD => BitvectorDomain::Value(
lhs_bitvec.clone().into_checked_srem(rhs_bitvec).unwrap(),
),
LSHIFT => {
let shift_amount = rhs_bitvec.try_to_u64().unwrap() as usize;
if shift_amount < lhs_bitvec.width().to_usize() {
BitvectorDomain::Value(
lhs_bitvec
.clone()
.into_checked_shl(shift_amount)
.unwrap(),
)
} else {
BitvectorDomain::Value(Bitvector::zero(lhs_bitvec.width()))
}
}
RSHIFT => {
let shift_amount = rhs_bitvec.try_to_u64().unwrap() as usize;
if shift_amount < lhs_bitvec.width().to_usize() {
BitvectorDomain::Value(
lhs_bitvec
.clone()
.into_checked_lshr(shift_amount)
.unwrap(),
)
} else {
BitvectorDomain::Value(Bitvector::zero(lhs_bitvec.width()))
}
}
ARSHIFT => BitvectorDomain::Value(
lhs_bitvec
.clone()
.into_checked_ashr(rhs_bitvec.try_to_u64().unwrap() as usize)
.unwrap(),
),
AND => BitvectorDomain::Value(lhs_bitvec & rhs_bitvec),
OR => BitvectorDomain::Value(lhs_bitvec | rhs_bitvec),
XOR => BitvectorDomain::Value(lhs_bitvec ^ rhs_bitvec),
EQ => {
assert_eq!(lhs_bitvec.width(), rhs_bitvec.width());
BitvectorDomain::Value(Bitvector::from(lhs_bitvec == rhs_bitvec))
}
NEQ => {
assert_eq!(lhs_bitvec.width(), rhs_bitvec.width());
BitvectorDomain::Value(Bitvector::from(lhs_bitvec != rhs_bitvec))
}
LT => BitvectorDomain::Value(Bitvector::from(
lhs_bitvec.checked_ult(rhs_bitvec).unwrap(),
)),
LE => BitvectorDomain::Value(Bitvector::from(
lhs_bitvec.checked_ule(rhs_bitvec).unwrap(),
)),
SLT => BitvectorDomain::Value(Bitvector::from(
lhs_bitvec.checked_slt(rhs_bitvec).unwrap(),
)),
SLE => BitvectorDomain::Value(Bitvector::from(
lhs_bitvec.checked_sle(rhs_bitvec).unwrap(),
)),
},
_ => match op {
PLUS | MINUS | TIMES | DIVIDE | SDIVIDE | MOD | SMOD | LSHIFT | RSHIFT
| ARSHIFT | AND | OR | XOR => BitvectorDomain::new_top(self.bitsize()),
EQ | NEQ | LT | LE | SLT | SLE => BitvectorDomain::new_top(1),
},
}
}
/// Evaluate the given unary operation.
fn un_op(&self, op: UnOpType) -> Self {
use UnOpType::*;
if let BitvectorDomain::Value(bitvec) = self {
match op {
NEG => BitvectorDomain::Value(-bitvec),
NOT => BitvectorDomain::Value(bitvec.clone().into_bitnot()),
}
} else {
BitvectorDomain::new_top(self.bitsize())
}
}
/// Extract a sub-bitvector out of a bitvector
fn extract(&self, low_bit: BitSize, high_bit: BitSize) -> Self {
if let BitvectorDomain::Value(bitvec) = self {
// TODO: Check whether this is correct on a real world example and then write a unit test for it
BitvectorDomain::Value(
bitvec
.clone()
.into_checked_lshr(low_bit as usize)
.unwrap()
.into_truncate((high_bit - low_bit + 1) as usize)
.unwrap(),
)
} else {
BitvectorDomain::new_top(high_bit - low_bit + 1)
}
}
fn cast(&self, kind: CastType, width: BitSize) -> Self {
if let BitvectorDomain::Value(bitvec) = self {
use CastType::*;
match kind {
UNSIGNED => {
BitvectorDomain::Value(bitvec.clone().into_zero_extend(width as usize).unwrap())
}
SIGNED => {
BitvectorDomain::Value(bitvec.clone().into_sign_extend(width as usize).unwrap())
}
HIGH => BitvectorDomain::Value(
bitvec
.clone()
.into_checked_lshr((self.bitsize() - width) as usize)
.unwrap()
.into_truncate(width as usize)
.unwrap(),
),
LOW => {
BitvectorDomain::Value(bitvec.clone().into_truncate(width as usize).unwrap())
}
}
} else {
BitvectorDomain::new_top(width)
}
}
fn concat(&self, other: &Self) -> Self {
match (self, other) {
(BitvectorDomain::Value(left_bitvec), BitvectorDomain::Value(right_bitvec)) => {
let new_bitwidth = (self.bitsize() + other.bitsize()) as usize;
let upper_bits = left_bitvec
.clone()
.into_zero_extend(new_bitwidth)
.unwrap()
.into_checked_shl(other.bitsize() as usize)
.unwrap();
let lower_bits = right_bitvec.clone().into_zero_extend(new_bitwidth).unwrap();
BitvectorDomain::Value(upper_bits | &lower_bits)
}
_ => BitvectorDomain::new_top(self.bitsize() + other.bitsize()),
}
}
}
impl AbstractDomain for BitvectorDomain {
fn top(&self) -> BitvectorDomain {
BitvectorDomain::Top(self.bitsize())
}
}
impl std::ops::Add for BitvectorDomain {
type Output = BitvectorDomain;
fn add(self, rhs: Self) -> Self {
assert_eq!(self.bitsize(), rhs.bitsize());
self.bin_op(crate::bil::BinOpType::PLUS, &rhs)
}
}
impl std::ops::Sub for BitvectorDomain {
type Output = BitvectorDomain;
fn sub(self, rhs: Self) -> Self {
assert_eq!(self.bitsize(), rhs.bitsize());
self.bin_op(crate::bil::BinOpType::MINUS, &rhs)
}
}
impl std::ops::Neg for BitvectorDomain {
type Output = BitvectorDomain;
fn neg(self) -> Self {
self.un_op(crate::bil::UnOpType::NEG)
}
}
impl std::convert::From<Bitvector> for BitvectorDomain {
fn from(bitvector: Bitvector) -> BitvectorDomain {
BitvectorDomain::Value(bitvector)
}
}
impl std::fmt::Display for BitvectorDomain {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Top(bitsize) => write!(formatter, "Top:i{}", bitsize),
Self::Value(bitvector) => write!(
formatter,
"0x{:016x}:i{:?}",
bitvector,
bitvector.width().to_usize()
),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn bv(value: i64) -> BitvectorDomain {
BitvectorDomain::Value(Bitvector::from_i64(value))
}
#[test]
fn bitvector_domain_as_value_domain() {
use crate::bil::BinOpType::*;
use crate::bil::CastType::*;
use crate::bil::UnOpType::*;
let eight = bv(8);
let sixteen = bv(16);
assert_eq!(sixteen.bin_op(PLUS, &eight), bv(24));
assert_eq!(sixteen.bin_op(MINUS, &eight), bv(8));
assert_eq!(sixteen.bin_op(TIMES, &eight), bv(16 * 8));
assert_eq!(sixteen.bin_op(DIVIDE, &eight), bv(2));
assert_eq!(sixteen.bin_op(SDIVIDE, &eight), bv(2));
assert_eq!(sixteen.bin_op(MOD, &eight), bv(0));
assert_eq!(sixteen.bin_op(SMOD, &eight), bv(0));
assert_eq!(sixteen.bin_op(LSHIFT, &bv(2)), bv(64));
assert_eq!(sixteen.bin_op(RSHIFT, &bv(2)), bv(4));
assert_eq!(sixteen.bin_op(ARSHIFT, &bv(2)), bv(4));
assert_eq!(sixteen.bin_op(AND, &eight), bv(0));
assert_eq!(sixteen.bin_op(OR, &eight), bv(24));
assert_eq!(sixteen.bin_op(XOR, &eight), bv(24));
assert_eq!(
sixteen.bin_op(EQ, &bv(16)),
BitvectorDomain::Value(Bitvector::from_bit(true))
);
assert_eq!(
sixteen.bin_op(NEQ, &bv(16)),
BitvectorDomain::Value(Bitvector::from_bit(false))
);
assert_eq!(sixteen.un_op(NEG), bv(-16));
assert_eq!(bv(0).un_op(NOT), bv(-1));
assert_eq!(
sixteen.extract(0, 31),
BitvectorDomain::Value(Bitvector::from_i32(16))
);
assert_eq!(
sixteen.extract(32, 63),
BitvectorDomain::Value(Bitvector::from_i32(0))
);
assert_eq!(
BitvectorDomain::Value(Bitvector::from_i32(2)),
BitvectorDomain::Value(Bitvector::from_i64(2 << 32)).cast(HIGH, 32)
);
assert_eq!(
BitvectorDomain::Value(Bitvector::from_i32(-1))
.concat(&BitvectorDomain::Value(Bitvector::from_i32(-1))),
bv(-1)
);
}
#[test]
fn bitvector_domain_as_abstract_domain() {
assert_eq!(bv(17).merge(&bv(17)), bv(17));
assert_eq!(bv(17).merge(&bv(16)), BitvectorDomain::new_top(64));
assert!(!bv(17).is_top());
assert!(BitvectorDomain::new_top(64).is_top());
}
}
/*!
This module implements a generic fixpoint algorithm for dataflow analysis.
A fixpoint problem is defined as a graph where:
- Each node `n` gets assigned a value `val(n)` where the set of all values forms a partially ordered set.
- Each edge `e` defines a rule `e:value -> value` how to compute the value at the end node given the value at the start node of the edge.
A fixpoint is an assignment of values to all nodes of the graph so that for all edges
`e(val(start_node)) <= val(end_node)` holds.
For general information on dataflow analysis using fixpoint algorithms see [Wikipedia](https://en.wikipedia.org/wiki/Data-flow_analysis).
Or open an issue on github that you want more documentation here. :-)
*/
use fnv::FnvHashMap;
use petgraph::graph::{DiGraph, EdgeIndex, NodeIndex};
use petgraph::visit::EdgeRef;
use std::collections::{BTreeMap, BinaryHeap};
/// A fixpoint problem defines the context for a fixpoint computation.
///
/// All trait methods have access to the FixpointProblem structure, so that context informations are accessible through it.
pub trait Problem {
type EdgeLabel: Clone;
type NodeLabel;
type NodeValue: PartialEq + Eq;
fn get_graph(&self) -> &DiGraph<Self::NodeLabel, Self::EdgeLabel>;
/// This function describes how to merge two values
fn merge(&self, val1: &Self::NodeValue, val2: &Self::NodeValue) -> Self::NodeValue;
/// This function describes how the value at the end node of an edge is computed from the value at the start node of the edge.
/// The function can return None to indicate that no end value gets generated through this edge.
/// E.g. In a control flow graph, if the edge cannot be taken for the given start value, this function should return None.
fn update_edge(&self, value: &Self::NodeValue, edge: EdgeIndex) -> Option<Self::NodeValue>;
}
/// The computation struct contains an intermediate result of a fixpoint computation.
pub struct Computation<T: Problem> {
fp_problem: T,
node_priority_list: Vec<usize>, // maps a node index to its priority (higher priority nodes get stabilized first)
priority_to_node_list: Vec<NodeIndex>, // maps a priority to the corresponding node index
worklist: BinaryHeap<usize>,
default_value: Option<T::NodeValue>,
node_values: FnvHashMap<NodeIndex, T::NodeValue>,
}
impl<T: Problem> Computation<T> {
/// Create a new fixpoint computation from a fixpoint problem, the corresponding graph
/// and a default value for all nodes if one should exists.
pub fn new(fp_problem: T, default_value: Option<T::NodeValue>) -> Self {
let graph = fp_problem.get_graph();
// order the nodes in weak topological order
let sorted_nodes: Vec<NodeIndex> = petgraph::algo::kosaraju_scc(&graph)
.into_iter()
.flatten()
.rev()
.collect();
let mut node_to_index = BTreeMap::new();
for (i, node_index) in sorted_nodes.iter().enumerate() {
node_to_index.insert(node_index, i);
}
let node_priority_list: Vec<usize> = node_to_index.values().copied().collect();
let mut worklist = BinaryHeap::new();
// If a default value exists, all nodes are added to the worklist. If not, the worklist is empty
if default_value.is_some() {
for i in 0..sorted_nodes.len() {
worklist.push(i);
}
}
Computation {
fp_problem,
node_priority_list,
priority_to_node_list: sorted_nodes,
worklist,
default_value,
node_values: FnvHashMap::default(),
}
}
/// Get the value of a node.
pub fn get_node_value(&self, node: NodeIndex) -> Option<&T::NodeValue> {
if let Some(ref value) = self.node_values.get(&node) {
Some(value)
} else {
self.default_value.as_ref()
}
}
/// Set the value of a node and mark the node as not yet stabilized.
pub fn set_node_value(&mut self, node: NodeIndex, value: T::NodeValue) {
self.node_values.insert(node, value);
self.worklist.push(self.node_priority_list[node.index()]);
}
/// Merge the value at a node with some new value.
fn merge_node_value(&mut self, node: NodeIndex, value: T::NodeValue) {
if let Some(old_value) = self.node_values.get(&node) {
let merged_value = self.fp_problem.merge(&value, old_value);
if merged_value != *old_value {
self.set_node_value(node, merged_value);
}
} else {
self.set_node_value(node, value);
}
}
/// Compute and update the value at the end node of an edge.
fn update_edge(&mut self, edge: EdgeIndex) {
let (start_node, end_node) = self
.fp_problem
.get_graph()
.edge_endpoints(edge)
.expect("Edge not found");
if let Some(start_val) = self.node_values.get(&start_node) {
if let Some(new_end_val) = self.fp_problem.update_edge(start_val, edge) {
self.merge_node_value(end_node, new_end_val);
}
}
}
/// Update all outgoing edges of a node.
fn update_node(&mut self, node: NodeIndex) {
let edges: Vec<EdgeIndex> = self
.fp_problem
.get_graph()
.edges(node)
.map(|edge_ref| edge_ref.id())
.collect();
for edge in edges {
self.update_edge(edge);
}
}
/// Compute the fixpoint of the fixpoint problem.
/// Each node will be visited at most max_steps times.
/// If a node does not stabilize after max_steps visits, the end result will not be a fixpoint but only an intermediate result of a fixpoint computation.
pub fn compute_with_max_steps(&mut self, max_steps: u64) {
let mut steps = vec![0; self.fp_problem.get_graph().node_count()];
while let Some(priority) = self.worklist.pop() {
let node = self.priority_to_node_list[priority];
if steps[node.index()] < max_steps {
steps[node.index()] += 1;
self.update_node(node);
}
}
}
/// Compute the fixpoint of the fixpoint problem.
/// If the fixpoint algorithm does not converge to a fixpoint, this function will not terminate.
pub fn compute(&mut self) {
while let Some(priority) = self.worklist.pop() {
let node = self.priority_to_node_list[priority];
self.update_node(node);
}
}
/// Get a reference to the internal map where one can look up the current values of all nodes
pub fn node_values(&self) -> &FnvHashMap<NodeIndex, T::NodeValue> {
&self.node_values
}
/// Get a reference to the underlying graph
pub fn get_graph(&self) -> &DiGraph<T::NodeLabel, T::EdgeLabel> {
self.fp_problem.get_graph()
}
}
#[cfg(test)]
mod tests {
use super::*;
struct FPProblem {
graph: DiGraph<(), u64>,
}
impl Problem for FPProblem {
type EdgeLabel = u64;
type NodeLabel = ();
type NodeValue = u64;
fn get_graph(&self) -> &DiGraph<(), u64> {
&self.graph
}
fn merge(&self, val1: &Self::NodeValue, val2: &Self::NodeValue) -> Self::NodeValue {
std::cmp::min(*val1, *val2)
}
fn update_edge(&self, value: &Self::NodeValue, edge: EdgeIndex) -> Option<Self::NodeValue> {
Some(value + self.graph.edge_weight(edge).unwrap())
}
}
#[test]
fn fixpoint() {
let mut graph: DiGraph<(), u64> = DiGraph::new();
for _i in 0..101 {
graph.add_node(());
}
for i in 0..100 {
graph.add_edge(NodeIndex::new(i), NodeIndex::new(i + 1), i as u64 % 10 + 1);
}
for i in 0..10 {
graph.add_edge(NodeIndex::new(i * 10), NodeIndex::new(i * 10 + 5), 0);
}
graph.add_edge(NodeIndex::new(100), NodeIndex::new(0), 0);
let mut solution = Computation::new(FPProblem { graph }, None);
solution.set_node_value(NodeIndex::new(0), 0);
solution.compute_with_max_steps(20);
assert_eq!(30, *solution.get_node_value(NodeIndex::new(9)).unwrap());
assert_eq!(0, *solution.get_node_value(NodeIndex::new(5)).unwrap());
}
}
/*!
This module implements functions to generate (interprocedural) control flow graphs out of a program term.
*/
use crate::term::*;
use petgraph::graph::{DiGraph, NodeIndex};
use serde::Serialize;
use std::collections::{HashMap, HashSet};
/// The graph type of an interprocedural control flow graph
pub type Graph<'a> = DiGraph<Node<'a>, Edge<'a>>;
/// The node type of an interprocedural control flow graph
#[derive(Serialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum Node<'a> {
BlkStart(&'a Term<Blk>),
BlkEnd(&'a Term<Blk>),
CallReturn(&'a Term<Blk>), // The block is the one from the call instruction
}
impl<'a> Node<'a> {
pub fn get_block(&self) -> &'a Term<Blk> {
use Node::*;
match self {
BlkStart(blk) | BlkEnd(blk) | CallReturn(blk) => blk,
}
}
}
impl<'a> std::fmt::Display for Node<'a> {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::BlkStart(block) => write!(formatter, "BlkStart @ {}", block.tid),
Self::BlkEnd(block) => write!(formatter, "BlkEnd @ {}", block.tid),
Self::CallReturn(block) => write!(formatter, "CallReturn (caller @ {})", block.tid),
}
}
}
// TODO: document that we assume that the graph only has blocks with either:
// - one unconditional call instruction
// - one return instruction
// - at most 2 intraprocedural jump instructions, i.e. at most one of them is a conditional jump
/// The node type of an interprocedural fixpoint graph
#[derive(Serialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum Edge<'a> {
Block,
Jump(&'a Term<Jmp>, Option<&'a Term<Jmp>>),
Call(&'a Term<Jmp>),
ExternCallStub(&'a Term<Jmp>),
CRCallStub,
CRReturnStub,
CRCombine(&'a Term<Jmp>),
}
/// A builder struct for building graphs
struct GraphBuilder<'a> {
program: &'a Term<Program>,
extern_subs: HashSet<Tid>,
graph: Graph<'a>,
jump_targets: HashMap<Tid, (NodeIndex, NodeIndex)>, // Denotes the NodeIndices of possible jump targets
return_addresses: HashMap<Tid, Vec<(NodeIndex, NodeIndex)>>, // for each function the list of return addresses of the corresponding call sites
}
impl<'a> GraphBuilder<'a> {
/// create a new builder with an amtpy graph
pub fn new(program: &'a Term<Program>, extern_subs: HashSet<Tid>) -> GraphBuilder<'a> {
GraphBuilder {
program,
extern_subs,
graph: Graph::new(),
jump_targets: HashMap::new(),
return_addresses: HashMap::new(),
}
}
/// add start and end nodes of a block and the connecting edge
fn add_block(&mut self, block: &'a Term<Blk>) {
let start = self.graph.add_node(Node::BlkStart(block));
let end = self.graph.add_node(Node::BlkEnd(block));
self.jump_targets.insert(block.tid.clone(), (start, end));
self.graph.add_edge(start, end, Edge::Block);
}
/// add all blocks of the program to the graph
fn add_program_blocks(&mut self) {
let subs = self.program.term.subs.iter();
let blocks = subs.map(|sub| sub.term.blocks.iter()).flatten();
for block in blocks {
self.add_block(block);
}
}
/// add all subs to the jump targets so that call instructions can be linked to the starting block of the corresponding sub.
fn add_subs_to_jump_targets(&mut self) {
for sub in self.program.term.subs.iter() {
if sub.term.blocks.len() > 0 {
let start_block = &sub.term.blocks[0];
let target_index = self.jump_targets[&start_block.tid];
self.jump_targets.insert(sub.tid.clone(), target_index);
}
// TODO: Generate Log-Message for Subs without blocks.
}
}
/// add call edges and interprocedural jump edges for a specific jump term to the graph
fn add_jump_edge(
&mut self,
source: NodeIndex,
jump: &'a Term<Jmp>,
untaken_conditional: Option<&'a Term<Jmp>>,
) {
match &jump.term.kind {
JmpKind::Goto(Label::Direct(tid)) => {
self.graph.add_edge(
source,
self.jump_targets[&tid].0,
Edge::Jump(jump, untaken_conditional),
);
}
JmpKind::Goto(Label::Indirect(_)) => (), // TODO: add handling of indirect edges!
JmpKind::Call(ref call) => {
if let Label::Direct(ref target_tid) = call.target {
if self.extern_subs.contains(target_tid) {
if let Some(Label::Direct(ref return_tid)) = call.return_ {
self.graph.add_edge(
source,
self.jump_targets[&return_tid].0,
Edge::ExternCallStub(jump),
);
}
} else {
if let Some(target) = self.jump_targets.get(&target_tid) {
self.graph.add_edge(source, target.0, Edge::Call(jump));
}
if let Some(Label::Direct(ref return_tid)) = call.return_ {
let return_index = self.jump_targets[return_tid].0;
self.return_addresses
.entry(target_tid.clone())
.and_modify(|vec| vec.push((source, return_index)))
.or_insert(vec![(source, return_index)]);
}
// TODO: Non-returning calls and tail calls both have no return target in BAP.
// Thus we need to distinguish them somehow to correctly handle tail calls.
}
}
}
JmpKind::Interrupt {
value: _,
return_addr: _,
} => (), // TODO: Add some handling for interrupts
JmpKind::Return(_) => {} // return edges are handled in a different function
}
}
/// Add all outgoing edges generated by calls and interprocedural jumps for a specific block to the graph.
/// Return edges are *not* added by this function.
fn add_outgoing_edges(&mut self, node: NodeIndex) {
let block: &'a Term<Blk> = self.graph[node].get_block();
let jumps = block.term.jmps.as_slice();
match jumps {
[] => (), // TODO: Decide whether blocks without jumps should be considered hard errors or (silent) dead ends
[jump] => self.add_jump_edge(node, jump, None),
[if_jump, else_jump] => {
self.add_jump_edge(node, if_jump, None);
self.add_jump_edge(node, else_jump, Some(if_jump));
}
_ => panic!("Basic block with more than 2 jumps encountered"),
}
}
/// For each return instruction and each corresponding call, add the following to the graph:
/// - a CallReturn node.
/// - edges from the callsite and from the returning-from-site to the CallReturn node
/// - an edge from the CallReturn node to the return-to-site
fn add_call_return_node_and_edges(
&mut self,
return_from_sub: &Term<Sub>,
return_source: NodeIndex,
) {
for (call_node, return_to_node) in self.return_addresses[&return_from_sub.tid].iter() {
let call_block = self.graph[*call_node].get_block();
let call_term = call_block
.term
.jmps
.iter()
.filter(|jump| matches!(jump.term.kind, JmpKind::Call(_)))
.next()
.unwrap();
let cr_combine_node = self.graph.add_node(Node::CallReturn(call_block));
self.graph
.add_edge(*call_node, cr_combine_node, Edge::CRCallStub);
self.graph
.add_edge(return_source, cr_combine_node, Edge::CRReturnStub);
self.graph
.add_edge(cr_combine_node, *return_to_node, Edge::CRCombine(call_term));
}
}
/// Add all return instruction related edges and nodes to the graph (for all return instructions).
fn add_return_edges(&mut self) {
for sub in &self.program.term.subs {
for block in &sub.term.blocks {
if block
.term
.jmps
.iter()
.find(|jmp| matches!(jmp.term.kind, JmpKind::Return(_)))
.is_some()
{
let return_from_node = self.jump_targets[&block.tid].1;
self.add_call_return_node_and_edges(sub, return_from_node);
}
}
}
}
/// Add all non-return-instruction-related jump edges to the graph.
fn add_jump_and_call_edges(&mut self) {
for node in self.graph.node_indices() {
if let Node::BlkEnd(_) = self.graph[node] {
self.add_outgoing_edges(node);
}
}
}
/// Build the interprocedural control flow graph.
pub fn build(mut self) -> Graph<'a> {
self.add_program_blocks();
self.add_subs_to_jump_targets();
self.add_jump_and_call_edges();
self.add_return_edges();
self.graph
}
}
/// This function builds the interprocedural control flow graph for a program term.
pub fn get_program_cfg(program: &Term<Program>, extern_subs: HashSet<Tid>) -> Graph {
let builder = GraphBuilder::new(program, extern_subs);
return builder.build();
}
/// For a given set of block TIDs generate a map from the TIDs to the indices of the BlkStart and BlkEnd nodes
/// corresponding to the block.
pub fn get_indices_of_block_nodes<'a, I: Iterator<Item = &'a Tid>>(
graph: &'a Graph,
block_tids: I,
) -> HashMap<Tid, (NodeIndex, NodeIndex)> {
let tids: HashSet<Tid> = block_tids.cloned().collect();
let mut tid_to_indices_map = HashMap::new();
for node_index in graph.node_indices() {
if let Some(tid) = tids.get(&graph[node_index].get_block().tid) {
match graph[node_index] {
Node::BlkStart(_block_term) => {
let start_index = node_index;
let end_index = graph.neighbors(start_index).next().unwrap();
tid_to_indices_map.insert(tid.clone(), (start_index, end_index));
}
_ => (),
}
}
}
return tid_to_indices_map;
}
#[cfg(test)]
mod tests {
use super::*;
fn mock_program() -> Term<Program> {
use Label::*;
let call = Call {
target: Direct(Tid::new("sub2")),
return_: Some(Direct(Tid::new("sub1_blk2"))),
};
let call_term = Term {
tid: Tid::new("call".to_string()),
term: Jmp {
condition: None,
kind: JmpKind::Call(call),
},
};
let return_term = Term {
tid: Tid::new("return".to_string()),
term: Jmp {
condition: None,
kind: JmpKind::Return(Direct(Tid::new("sub1_blk2"))),
},
};
let jmp = Jmp {
condition: None,
kind: JmpKind::Goto(Direct(Tid::new("sub1_blk1"))),
};
let jmp_term = Term {
tid: Tid::new("jump"),
term: jmp,
};
let sub1_blk1 = Term {
tid: Tid::new("sub1_blk1"),
term: Blk {
defs: Vec::new(),
jmps: vec![call_term],
},
};
let sub1_blk2 = Term {
tid: Tid::new("sub1_blk2"),
term: Blk {
defs: Vec::new(),
jmps: vec![jmp_term],
},
};
let sub1 = Term {
tid: Tid::new("sub1"),
term: Sub {
name: "sub1".to_string(),
blocks: vec![sub1_blk1, sub1_blk2],
},
};
let sub2_blk1 = Term {
tid: Tid::new("sub2_blk1"),
term: Blk {
defs: Vec::new(),
jmps: vec![return_term],
},
};
let sub2 = Term {
tid: Tid::new("sub2"),
term: Sub {
name: "sub2".to_string(),
blocks: vec![sub2_blk1],
},
};
let program = Term {
tid: Tid::new("program"),
term: Program {
subs: vec![sub1, sub2],
extern_symbols: Vec::new(),
entry_points: Vec::new(),
},
};
program
}
#[test]
fn create_program_cfg() {
let program = mock_program();
let graph = get_program_cfg(&program, HashSet::new());
println!("{}", serde_json::to_string_pretty(&graph).unwrap());
assert_eq!(graph.node_count(), 7);
assert_eq!(graph.edge_count(), 8);
}
}
/*!
This module defines a trait for interprocedural fixpoint problems.
## Basic usage
Define a *Context* struct containing all information that does not change during the fixpoint computation.
In particular, this includes the graph on which the fixpoint computation is run.
Then implement the *Problem* trait for the *Context* struct.
The fixpoint computation can now be run as follows:
```
let context = MyContext::new(); // MyContext needs to implement Problem
let mut computation = Computation::new(context, None);
// add starting node values here with
computation.compute();
// computation is done, get solution node values here
```
*/
// TODO: When indirect jumps are sufficiently supported, the update_jump methods need access to
// target (and maybe source) nodes/TIDs, to determine which target the current edge points to.
// Alternatively, this could be achieved through usage of the specialize_conditional function.
// Currently unclear, which way is better.
use super::fixpoint::Problem as GeneralFPProblem;
use super::graph::*;
use crate::bil::Expression;
use crate::prelude::*;
use crate::term::*;
use fnv::FnvHashMap;
use petgraph::graph::{EdgeIndex, NodeIndex};
use std::marker::PhantomData;
#[derive(PartialEq, Eq, Serialize, Deserialize)]
pub enum NodeValue<T: PartialEq + Eq> {
Value(T),
CallReturnCombinator { call: Option<T>, return_: Option<T> },
}
impl<T: PartialEq + Eq> NodeValue<T> {
pub fn unwrap_value(&self) -> &T {
match self {
NodeValue::Value(value) => value,
_ => panic!("Unexpected node value type"),
}
}
}
/// An interprocedural fixpoint problem defines the context for a fixpoint computation.
///
/// All trait methods have access to the FixpointProblem structure, so that context informations are accessible through it.
pub trait Problem<'a> {
type Value: PartialEq + Eq + Clone;
fn get_graph(&self) -> &Graph<'a>;
fn merge(&self, value1: &Self::Value, value2: &Self::Value) -> Self::Value;
fn update_def(&self, value: &Self::Value, def: &Term<Def>) -> Self::Value;
fn update_jump(
&self,
value: &Self::Value,
jump: &Term<Jmp>,
untaken_conditional: Option<&Term<Jmp>>,
) -> Option<Self::Value>;
fn update_call(&self, value: &Self::Value, call: &Term<Jmp>, target: &Node) -> Self::Value;
fn update_return(
&self,
value: &Self::Value,
value_before_call: Option<&Self::Value>,
call_term: &Term<Jmp>,
) -> Option<Self::Value>;
fn update_call_stub(&self, value: &Self::Value, call: &Term<Jmp>) -> Option<Self::Value>;
fn specialize_conditional(
&self,
value: &Self::Value,
condition: &Expression,
is_true: bool,
) -> Option<Self::Value>;
}
/// This struct is a wrapper to create a general fixpoint problem out of an interprocedural fixpoint problem.
struct GeneralizedProblem<'a, T: Problem<'a>> {
problem: T,
_phantom_graph_reference: PhantomData<Graph<'a>>,
}
impl<'a, T: Problem<'a>> GeneralizedProblem<'a, T> {
pub fn new(problem: T) -> Self {
GeneralizedProblem {
problem,
_phantom_graph_reference: PhantomData,
}
}
}
impl<'a, T: Problem<'a>> GeneralFPProblem for GeneralizedProblem<'a, T> {
type EdgeLabel = Edge<'a>;
type NodeLabel = Node<'a>;
type NodeValue = NodeValue<T::Value>;
fn get_graph(&self) -> &Graph<'a> {
self.problem.get_graph()
}
fn merge(&self, val1: &Self::NodeValue, val2: &Self::NodeValue) -> Self::NodeValue {
use NodeValue::*;
match (val1, val2) {
(Value(value1), Value(value2)) => Value(self.problem.merge(value1, value2)),
(
CallReturnCombinator {
call: call1,
return_: return1,
},
CallReturnCombinator {
call: call2,
return_: return2,
},
) => CallReturnCombinator {
call: merge_option(call1, call2, |v1, v2| self.problem.merge(v1, v2)),
return_: merge_option(return1, return2, |v1, v2| self.problem.merge(v1, v2)),
},
_ => panic!("Malformed CFG in fixpoint computation"),
}
}
fn update_edge(
&self,
node_value: &Self::NodeValue,
edge: EdgeIndex,
) -> Option<Self::NodeValue> {
let graph = self.problem.get_graph();
let (start_node, end_node) = graph.edge_endpoints(edge).unwrap();
let block_term = graph.node_weight(start_node).unwrap().get_block();
match graph.edge_weight(edge).unwrap() {
Edge::Block => {
let value = node_value.unwrap_value();
let defs = &block_term.term.defs;
let end_val = defs.iter().fold(value.clone(), |accum, def| {
self.problem.update_def(&accum, def)
});
Some(NodeValue::Value(end_val))
}
Edge::Call(call) => Some(NodeValue::Value(self.problem.update_call(
node_value.unwrap_value(),
call,
&graph[end_node],
))),
Edge::CRCallStub => Some(NodeValue::CallReturnCombinator {
call: Some(node_value.unwrap_value().clone()),
return_: None,
}),
Edge::CRReturnStub => Some(NodeValue::CallReturnCombinator {
call: None,
return_: Some(node_value.unwrap_value().clone()),
}),
Edge::CRCombine(call_term) => match node_value {
NodeValue::Value(_) => panic!("Unexpected interprocedural fixpoint graph state"),
NodeValue::CallReturnCombinator { call, return_ } => {
if let Some(return_value) = return_ {
match self
.problem
.update_return(return_value, call.as_ref(), call_term)
{
Some(val) => Some(NodeValue::Value(val)),
None => None,
}
} else {
None
}
}
},
Edge::ExternCallStub(call) => self
.problem
.update_call_stub(node_value.unwrap_value(), call)
.map(|val| NodeValue::Value(val)),
Edge::Jump(jump, untaken_conditional) => self
.problem
.update_jump(node_value.unwrap_value(), jump, *untaken_conditional)
.map(|val| NodeValue::Value(val)),
}
}
}
/// This struct contains an intermediate result of an interprocedural fixpoint cumputation.
pub struct Computation<'a, T: Problem<'a>> {
generalized_computation: super::fixpoint::Computation<GeneralizedProblem<'a, T>>,
}
impl<'a, T: Problem<'a>> Computation<'a, T> {
/// Generate a new computation from the corresponding problem and a default value for nodes.
pub fn new(problem: T, default_value: Option<T::Value>) -> Self {
let generalized_problem = GeneralizedProblem::new(problem);
let computation = super::fixpoint::Computation::new(
generalized_problem,
default_value.map(|val| NodeValue::Value(val)),
);
Computation {
generalized_computation: computation,
}
}
/// Compute the fixpoint.
/// Note that this function does not terminate if the fixpoint algorithm does not stabilize
pub fn compute(&mut self) {
self.generalized_computation.compute()
}
/// Compute the fixpoint while updating each node at most max_steps times.
/// Note that the result may not be a stabilized fixpoint, but only an intermediate result of a fixpoint computation.
pub fn compute_with_max_steps(&mut self, max_steps: u64) {
self.generalized_computation
.compute_with_max_steps(max_steps)
}
/// Get the value of a node.
pub fn get_node_value(&self, node: NodeIndex) -> Option<&NodeValue<T::Value>> {
self.generalized_computation.get_node_value(node)
}
/// Set the value of a node and mark the node as not yet stabilized
pub fn set_node_value(&mut self, node: NodeIndex, value: NodeValue<T::Value>) {
self.generalized_computation.set_node_value(node, value)
}
/// Get a reference to the internal map where one can look up the current values of all nodes
pub fn node_values(&self) -> &FnvHashMap<NodeIndex, NodeValue<T::Value>> {
self.generalized_computation.node_values()
}
/// Get a reference to the underlying graph
pub fn get_graph(&self) -> &Graph {
self.generalized_computation.get_graph()
}
}
fn merge_option<T: Clone, F>(opt1: &Option<T>, opt2: &Option<T>, merge: F) -> Option<T>
where
F: Fn(&T, &T) -> T,
{
match (opt1, opt2) {
(Some(value1), Some(value2)) => Some(merge(value1, value2)),
(Some(value), None) | (None, Some(value)) => Some(value.clone()),
(None, None) => None,
}
}
/*!
A memory region is an abstract domain representing a continuous region of memory.
For example, a stack domain containing values written to the stack can be represented with a memory region.
Design notes:
- The values do not need a fixed size.
Instead you need to provide the size of an element when adding it to the memory region.
- Whenever you try to read from an address that is not assigned to a value, the `Value::top()` element gets returned.
The reason behind this is that the value could be anything.
- Whenever adding an element intersects existing elements, the existing ones get removed from the memory region.
The reason is that reading the old positions afterwards could yield anything.
- Whenever a read from a correct position but with an incorrect size occurs, `Value::top()` gets returned.
That is because the value could be anything if the size read is too big and reading of partial values is not implemented for this type.
- An empty memory region could yield anything (in the sense of `Value::top`) at a read at any position.
In that regard, an empty memory region is actually the `top()` element of the domain.
- TODO: Implement the abstract domain trait for MemRegion.
- TODO: Remove the implicit saving of element sizes, as ValueDomains have now an intrinsic size.
Implementation needs is_top() to be a member function of the ValueDomain trait.
*/
use super::abstract_domain::*;
use crate::bil::{BitSize, Bitvector};
use apint::{Int, Width};
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use std::sync::Arc;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
struct Element<T> {
size: i64,
value: T,
}
#[derive(Serialize, Deserialize, Debug, Hash, Clone)]
pub struct MemRegion<T: AbstractDomain + ValueDomain + std::fmt::Debug>(Arc<MemRegionData<T>>);
impl<T: AbstractDomain + ValueDomain + std::fmt::Debug> PartialEq for MemRegion<T> {
fn eq(&self, other: &Self) -> bool {
if Arc::ptr_eq(&self.0, &other.0) {
true
} else {
self.0 == other.0
}
}
}
impl<T: AbstractDomain + ValueDomain + std::fmt::Debug> Eq for MemRegion<T> {}
impl<T: AbstractDomain + ValueDomain + std::fmt::Debug> MemRegion<T> {
pub fn new(address_bitsize: BitSize) -> Self {
MemRegion(Arc::new(MemRegionData::new(address_bitsize)))
}
pub fn get_address_bitsize(&self) -> BitSize {
self.0.get_address_bitsize()
}
pub fn merge(&self, other: &Self) -> Self {
if self == other {
self.clone()
} else {
MemRegion(Arc::new(self.0.merge(&other.0)))
}
}
pub fn add(&mut self, value: T, position: Bitvector) {
Arc::make_mut(&mut self.0).add(value, position)
}
pub fn get(&self, position: Bitvector, size_in_bytes: u64) -> T {
self.0.get(position, size_in_bytes)
}
pub fn remove(&mut self, position: Bitvector, size_in_bytes: Bitvector) {
Arc::make_mut(&mut self.0).remove(position, size_in_bytes)
}
pub fn iter_values(&self) -> std::collections::btree_map::Values<'_, i64, T> {
self.0.values.values()
}
pub fn iter_values_mut(&mut self) -> std::collections::btree_map::ValuesMut<'_, i64, T> {
Arc::make_mut(&mut self.0).values.values_mut()
}
pub fn iter(&self) -> std::collections::btree_map::Iter<i64, T> {
self.0.values.iter()
}
}
/// An abstract domain representing a continuous region of memory. See the module level description for more.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
struct MemRegionData<T: AbstractDomain + ValueDomain + std::fmt::Debug> {
address_bitsize: BitSize,
values: BTreeMap<i64, T>,
}
impl<T: AbstractDomain + ValueDomain + std::fmt::Debug> MemRegionData<T> {
/// create a new, empty MemRegion
pub fn new(address_bitsize: BitSize) -> MemRegionData<T> {
MemRegionData {
address_bitsize,
values: BTreeMap::new(),
}
}
pub fn get_address_bitsize(&self) -> BitSize {
self.address_bitsize
}
/// Remove all elements intersecting the provided interval.
/// This function does not sanitize its inputs.
fn clear_interval(&mut self, position: i64, size: i64) {
// If the previous element intersects the range, remove it
if let Some((prev_pos, prev_size)) = self
.values
.range(..position)
.map(|(pos, elem)| (*pos, elem.bitsize() as i64 / 8))
.last()
{
if prev_pos + prev_size > position {
self.values.remove(&prev_pos);
}
}
// remove all other intersecting elements
let intersecting_elements: Vec<i64> = self
.values
.range(position..(position + size))
.map(|(pos, _elem)| *pos)
.collect();
for index in intersecting_elements {
self.values.remove(&index);
}
}
/// Add a value to the memory region.
pub fn add(&mut self, value: T, position: Bitvector) {
assert_eq!(position.width().to_usize(), self.address_bitsize as usize);
let position = Int::from(position).try_to_i64().unwrap();
assert!(value.bitsize() % 8 == 0);
let size_in_bytes = value.bitsize() as i64 / 8;
assert!(size_in_bytes > 0);
self.clear_interval(position, size_in_bytes);
if !value.is_top() {
// top()-values do not need to be explicitly saved, as they don't contain any information anyway.
self.values.insert(position, value);
}
}
/// Get the value at the given position.
/// If there is no value at the position or the size of the element is not the same as the provided size, return `T::top()`.
pub fn get(&self, position: Bitvector, size_in_bytes: u64) -> T {
assert_eq!(position.width().to_usize(), self.address_bitsize as usize);
let position = Int::from(position).try_to_i64().unwrap();
let size = size_in_bytes as i64;
assert!(size > 0);
if let Some(elem) = self.values.get(&position) {
if (elem.bitsize() as i64) == (size * 8) {
return elem.clone();
}
}
let bitsize = 8 * size as u16;
return T::new_top(bitsize);
}
/// Remove all elements intersecting the provided interval.
pub fn remove(&mut self, position: Bitvector, size_in_bytes: Bitvector) {
assert_eq!(position.width().to_usize(), self.address_bitsize as usize);
let position = Int::from(position).try_to_i64().unwrap();
let size = Int::from(size_in_bytes).try_to_i64().unwrap();
assert!(size > 0);
self.clear_interval(position, size);
}
/// Merge two memory regions.
///
/// Values at the same position and with the same size get merged via their merge function.
/// Other values are *not* added to the merged region, because they could be anything in at least one of the two regions.
pub fn merge(&self, other: &MemRegionData<T>) -> MemRegionData<T> {
assert_eq!(self.address_bitsize, other.address_bitsize);
let mut merged_values: BTreeMap<i64, T> = BTreeMap::new();
// add all elements contained in both memory regions
for (pos_left, elem_left) in self.values.iter() {
if let Some((_pos_right, elem_right)) = other.values.get_key_value(pos_left) {
if elem_left.bitsize() == elem_right.bitsize() {
let merged_val = elem_left.merge(&elem_right);
if !merged_val.is_top() {
// we discard top()-values, as they don't contain information
merged_values.insert(*pos_left, merged_val);
}
}
}
}
MemRegionData {
address_bitsize: self.address_bitsize,
values: merged_values,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[derive(PartialEq, Eq, Clone, Copy, Debug, Hash, PartialOrd, Ord)]
struct MockDomain(i64, BitSize);
impl AbstractDomain for MockDomain {
fn top(&self) -> MockDomain {
MockDomain::new_top(self.1)
}
}
impl ValueDomain for MockDomain {
fn bitsize(&self) -> BitSize {
self.1
}
fn new_top(bitsize: BitSize) -> MockDomain {
MockDomain(0, bitsize)
}
fn bin_op(&self, _op: crate::bil::BinOpType, _rhs: &Self) -> Self {
Self::new_top(self.1)
}
fn un_op(&self, _op: crate::bil::UnOpType) -> Self {
Self::new_top(self.1)
}
fn cast(&self, _kind: crate::bil::CastType, width: BitSize) -> Self {
Self::new_top(width)
}
}
fn mock(val: i64, bitsize: BitSize) -> MockDomain {
MockDomain(val, bitsize)
}
fn bv(val: i64) -> Bitvector {
Bitvector::from_i64(val)
}
#[test]
fn mem_region() {
let mut region: MemRegion<MockDomain> = MemRegion::new(64);
region.add(mock(5, 3 * 8), bv(5));
assert_eq!(region.get(bv(5), 3), mock(5, 3 * 8));
region.add(mock(7, 2 * 8), bv(8));
assert_eq!(region.get(bv(8), 2), mock(7, 2 * 8));
assert_eq!(region.get(bv(5), 3), mock(5, 3 * 8));
assert_eq!(region.get(bv(5), 2), MockDomain::new_top(2 * 8));
region.add(mock(9, 2 * 8), bv(6));
assert_eq!(region.get(bv(6), 2), mock(9, 2 * 8));
assert_eq!(region.get(bv(5), 3), MockDomain::new_top(3 * 8));
assert_eq!(region.get(bv(8), 2), mock(7, 2 * 8));
region.add(mock(9, 11 * 8), bv(-3));
assert_eq!(region.get(bv(-3), 11), mock(9, 11 * 8));
assert_eq!(region.get(bv(6), 2), MockDomain::new_top(2 * 8));
assert_eq!(region.get(bv(8), 2), mock(7, 2 * 8));
let mut other_region = MemRegion::new(64);
other_region.add(mock(7, 2 * 8), bv(8));
assert!(region != other_region);
let merged_region = region.merge(&other_region);
assert_eq!(merged_region.get(bv(8), 2), mock(7, 2 * 8));
assert_eq!(merged_region.get(bv(-3), 11), MockDomain::new_top(11 * 8));
other_region.add(mock(9, 11 * 8), bv(-3));
assert_eq!(region, other_region);
}
#[test]
fn do_not_save_top_elements() {
let mut region: MemRegionData<MockDomain> = MemRegionData::new(64);
region.add(MockDomain::new_top(4 * 8), bv(5));
assert_eq!(region.values.len(), 0);
let mut other_region: MemRegionData<MockDomain> = MemRegionData::new(64);
region.add(mock(5, 4 * 8), bv(5));
other_region.add(mock(7, 4 * 8), bv(5));
let merged_region = region.merge(&other_region);
assert_eq!(region.values.len(), 1);
assert_eq!(other_region.values.len(), 1);
assert_eq!(merged_region.values.len(), 0);
}
}
pub mod abstract_domain;
pub mod fixpoint;
pub mod graph;
pub mod interprocedural_fixpoint;
pub mod mem_region;
pub mod pointer_inference;
use crate::analysis::abstract_domain::*;
use crate::analysis::graph::Graph;
use crate::bil::Expression;
use crate::prelude::*;
use crate::term::symbol::ExternSymbol;
use crate::term::*;
use crate::utils::log::*;
use std::collections::{BTreeMap, BTreeSet, HashSet};
use super::data::Data;
use super::identifier::*;
use super::state::State;
pub struct Context<'a> {
pub graph: Graph<'a>,
pub project: &'a Project,
pub extern_symbol_map: BTreeMap<Tid, &'a ExternSymbol>,
pub cwe_collector: crossbeam_channel::Sender<CweWarning>,
pub log_collector: crossbeam_channel::Sender<LogMessage>,
}
impl<'a> Context<'a> {
pub fn new(
project: &Project,
cwe_collector: crossbeam_channel::Sender<CweWarning>,
log_collector: crossbeam_channel::Sender<LogMessage>,
) -> Context {
let mut extern_symbol_map = BTreeMap::new();
for symbol in project.program.term.extern_symbols.iter() {
extern_symbol_map.insert(symbol.tid.clone(), symbol);
}
let extern_symbol_tid_set: HashSet<Tid> = project
.program
.term
.extern_symbols
.iter()
.map(|symb| symb.tid.clone())
.collect();
let graph =
crate::analysis::graph::get_program_cfg(&project.program, extern_symbol_tid_set);
Context {
graph,
project,
extern_symbol_map,
cwe_collector,
log_collector,
}
}
pub fn log_debug<'_lt>(&self, result: Result<(), Error>, location: Option<&'_lt Tid>) {
if let Err(err) = result {
let log_message = LogMessage {
text: format!("Pointer Inference: {}", err),
level: LogLevel::Debug,
location: location.cloned(),
};
self.log_collector.send(log_message).unwrap();
}
}
}
impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a> {
type Value = State;
fn get_graph(&self) -> &Graph<'a> {
&self.graph
}
fn merge(&self, value1: &State, value2: &State) -> State {
value1.merge(value2)
}
fn update_def(&self, state: &Self::Value, def: &Term<Def>) -> Self::Value {
// first check for use-after-frees
if state.contains_access_of_dangling_memory(&def.term.rhs) {
let warning = CweWarning {
name: "CWE416".to_string(),
version: "0.1".to_string(),
addresses: vec![def.tid.address.clone()],
tids: vec![format!("{}", def.tid)],
symbols: Vec::new(),
other: Vec::new(),
description: format!(
"(Use After Free) Access through a dangling pointer at {}",
def.tid.address
),
};
self.cwe_collector.send(warning).unwrap();
}
// TODO: handle loads in the right hand side expression for their side effects!
match &def.term.rhs {
Expression::Store { .. } => {
let mut state = state.clone();
self.log_debug(state.handle_store_exp(&def.term.rhs), Some(&def.tid));
state
}
Expression::IfThenElse {
condition,
true_exp,
false_exp,
} => {
// IfThenElse needs special handling, because it may encode conditional store instructions.
let mut true_state = state.clone();
if let Expression::Store { .. } = **true_exp {
self.log_debug(true_state.handle_store_exp(true_exp), Some(&def.tid));
} else {
self.log_debug(
true_state.handle_register_assign(&def.term.lhs, true_exp),
Some(&def.tid),
);
};
let mut false_state = state.clone();
if let Expression::Store { .. } = **false_exp {
self.log_debug(false_state.handle_store_exp(false_exp), Some(&def.tid));
} else {
self.log_debug(
false_state.handle_register_assign(&def.term.lhs, false_exp),
Some(&def.tid),
);
};
match state.eval(condition) {
Ok(Data::Value(cond)) if !cond.is_top() => {
if cond == Bitvector::from_bit(true).into() {
true_state
} else if cond == Bitvector::from_bit(false).into() {
false_state
} else {
panic!("IfThenElse with wrong condition bitsize encountered")
}
}
Ok(_) => true_state.merge(&false_state),
Err(err) => panic!("IfThenElse-Condition evaluation failed: {}", err),
}
}
expression => {
let mut new_state = state.clone();
self.log_debug(
new_state.handle_register_assign(&def.term.lhs, expression),
Some(&def.tid),
);
new_state
}
}
}
fn update_jump(
&self,
value: &State,
_jump: &Term<Jmp>,
_untaken_conditional: Option<&Term<Jmp>>,
) -> Option<State> {
// TODO: Implement some real specialization of conditionals!
let mut new_value = value.clone();
new_value.remove_virtual_register();
Some(new_value)
}
fn update_call(
&self,
state: &State,
call_term: &Term<Jmp>,
_target_node: &crate::analysis::graph::Node,
) -> State {
let call = if let JmpKind::Call(ref call) = call_term.term.kind {
call
} else {
panic!("Malformed control flow graph: Encountered call edge with a non-call jump term.")
};
let stack_offset_domain = self.get_current_stack_offset(state);
if let Label::Direct(ref callee_tid) = call.target {
let callee_stack_id = AbstractIdentifier::new(
callee_tid.clone(),
AbstractLocation::from_var(&self.project.stack_pointer_register).unwrap(),
);
let new_caller_stack_id = AbstractIdentifier::new(
call_term.tid.clone(),
AbstractLocation::from_var(&self.project.stack_pointer_register).unwrap(),
);
let stack_offset_adjustment = stack_offset_domain.clone();
let address_bitsize = self.project.stack_pointer_register.bitsize().unwrap();
let mut callee_state = state.clone();
callee_state.remove_virtual_register();
// Replace the caller stack id with one determined by the call instruction.
// This has to be done *before* adding the new callee stack id to avoid confusing caller and callee stack ids in case of recursive calls.
callee_state.replace_abstract_id(
&state.stack_id,
&new_caller_stack_id,
&stack_offset_adjustment,
);
// add a new memory object for the callee stack frame
callee_state.memory.add_abstract_object(
callee_stack_id.clone(),
Bitvector::zero(apint::BitWidth::new(address_bitsize as usize).unwrap()).into(),
super::object::ObjectType::Stack,
address_bitsize,
);
// set the new stack_id
callee_state.stack_id = callee_stack_id.clone();
// Set the stack pointer register to the callee stack id.
// At the beginning of a function this is the only known pointer to the new stack frame.
self.log_debug(
callee_state.set_register(
&self.project.stack_pointer_register,
super::data::PointerDomain::new(
callee_stack_id,
Bitvector::zero(apint::BitWidth::new(address_bitsize as usize).unwrap())
.into(),
)
.into(),
),
Some(&call_term.tid),
);
// set the list of caller stack ids to only this caller id
callee_state.caller_ids = BTreeSet::new();
callee_state.caller_ids.insert(new_caller_stack_id.clone());
// remove non-referenced objects from the state
callee_state.remove_unreferenced_objects();
return callee_state;
} else {
panic!("Indirect call edges not yet supported.")
// TODO: Support indirect call edges!
}
}
fn update_return(
&self,
state_before_return: &State,
state_before_call: Option<&State>,
call_term: &Term<Jmp>,
) -> Option<State> {
// we only return to functions with a value before the call to prevent returning to dead code
let state_before_call = match state_before_call {
Some(value) => value,
None => return None,
};
let original_caller_stack_id = &state_before_call.stack_id;
let caller_stack_id = AbstractIdentifier::new(
call_term.tid.clone(),
AbstractLocation::from_var(&self.project.stack_pointer_register).unwrap(),
);
let callee_stack_id = &state_before_return.stack_id;
let stack_offset_on_call = self.get_current_stack_offset(state_before_call);
let mut state_after_return = state_before_return.clone();
state_after_return.remove_virtual_register();
state_after_return.replace_abstract_id(
&caller_stack_id,
original_caller_stack_id,
&(-stack_offset_on_call.clone()),
);
state_after_return.merge_callee_stack_to_caller_stack(
callee_stack_id,
original_caller_stack_id,
&(-stack_offset_on_call.clone()),
);
state_after_return.stack_id = original_caller_stack_id.clone();
state_after_return.caller_ids = state_before_call.caller_ids.clone();
// remove non-referenced objects from the state
state_after_return.remove_unreferenced_objects();
// TODO: I need to detect and report cases where pointers to objects on the callee stack get returned, as this has its own CWE number!
// Detect and report cases, where knowledge about the offset of the stack pointer gets lost on return!
// Maybe add a fallback repair mechanism in these cases.
Some(state_after_return)
}
fn update_call_stub(&self, state: &State, call: &Term<Jmp>) -> Option<State> {
let mut new_state = state.clone();
let call_target = match &call.term.kind {
JmpKind::Call(call_inner) => &call_inner.target,
_ => panic!("Malformed control flow graph encountered."),
};
// Clear non-callee-saved registers from the state.
new_state.clear_non_callee_saved_register(&self.project.callee_saved_registers[..]);
// Set the stack register value.
// TODO: This is wrong if the extern call clears more from the stack than just the return address.
// TODO: a check on validity of the return address could also be useful here.
let stack_register = &self.project.stack_pointer_register;
{
let stack_pointer = state.get_register(stack_register).unwrap();
let offset = Bitvector::from_u16(stack_register.bitsize().unwrap() / 8)
.into_zero_extend(stack_register.bitsize().unwrap() as usize)
.unwrap();
self.log_debug(
new_state.set_register(
stack_register,
stack_pointer.bin_op(crate::bil::BinOpType::PLUS, &Data::bitvector(offset)),
),
Some(&call.tid),
);
}
match call_target {
Label::Direct(tid) => {
if let Some(extern_symbol) = self.extern_symbol_map.get(tid) {
// TODO: Replace the hardcoded symbol matching by something configurable in config.json!
// TODO: This implementation ignores that allocation functions may return Null,
// since this is not yet representable in the state object.
// Check all parameter register for dangling pointers and report possible use-after-free if one is found.
for argument in extern_symbol
.arguments
.iter()
.filter(|arg| arg.intent.is_input())
{
match state.eval(&argument.location) {
Ok(value) => {
if state.memory.is_dangling_pointer(&value) {
let warning = CweWarning {
name: "CWE416".to_string(),
version: "0.1".to_string(),
addresses: vec![call.tid.address.clone()],
tids: vec![format!("{}", call.tid)],
symbols: Vec::new(),
other: Vec::new(),
description: format!("(Use After Free) Call to {} may access freed memory at {}", extern_symbol.name, call.tid.address),
};
self.cwe_collector.send(warning).unwrap();
}
}
Err(err) => self.log_debug(
Err(err.context(format!(
"Function argument expression {:?} could not be evaluated",
argument.location
))),
Some(&call.tid),
),
}
}
match extern_symbol.name.as_str() {
"malloc" | "calloc" | "realloc" | "xmalloc" => {
if let Ok(return_register) = extern_symbol.get_unique_return_register()
{
let object_id = AbstractIdentifier::new(
call.tid.clone(),
AbstractLocation::from_var(return_register).unwrap(),
);
let address_bitsize =
self.project.stack_pointer_register.bitsize().unwrap();
new_state.memory.add_abstract_object(
object_id.clone(),
Bitvector::zero((address_bitsize as usize).into()).into(),
super::object::ObjectType::Heap,
address_bitsize,
);
let pointer = super::data::PointerDomain::new(
object_id,
Bitvector::zero((address_bitsize as usize).into()).into(),
);
self.log_debug(
new_state.set_register(return_register, pointer.into()),
Some(&call.tid),
);
return Some(new_state);
} else {
// We cannot track the new object, since we do not know where to store the pointer to it.
// TODO: Return a diagnostics message to the user here.
return Some(new_state);
}
}
"free" => {
match extern_symbol.get_unique_parameter() {
Ok(parameter_expression) => {
if let Ok(memory_object_pointer) =
state.eval(parameter_expression)
{
if let Data::Pointer(pointer) = memory_object_pointer {
if let Err(possible_double_free_object_ids) =
new_state.mark_mem_object_as_freed(&pointer)
{
let warning = CweWarning {
name: "CWE415".to_string(),
version: "0.1".to_string(),
addresses: vec![call.tid.address.clone()],
tids: vec![format!("{}", call.tid)],
symbols: Vec::new(),
other: vec![possible_double_free_object_ids.into_iter().map(|id| {format!("{}", id)}).collect()],
description: format!("(Double Free) Object may have been freed before at {}", call.tid.address),
};
self.cwe_collector.send(warning).unwrap();
}
} // TODO: add diagnostics for else case
new_state.remove_unreferenced_objects();
return Some(new_state);
} else {
// TODO: add diagnostics message for the user here
return Some(new_state);
}
}
Err(err) => {
// We do not know which memory object to free
self.log_debug(Err(err), Some(&call.tid));
return Some(new_state);
}
}
}
_ => {
self.log_debug(
new_state.clear_stack_parameter(extern_symbol),
Some(&call.tid),
);
let mut possible_referenced_ids = BTreeSet::new();
if extern_symbol.arguments.len() == 0 {
// TODO: We assume here that we do not know the parameters and approximate them by all parameter registers.
// This approximation is wrong if the function is known but has neither parameters nor return values.
// We need to somehow distinguish these two cases.
// TODO: We need to cleanup stack memory below the current position of the stack pointer.
for parameter_register_name in
self.project.parameter_registers.iter()
{
if let Some(register_value) =
state.get_register_by_name(parameter_register_name)
{
possible_referenced_ids
.append(&mut register_value.referenced_ids());
}
}
} else {
for parameter in extern_symbol
.arguments
.iter()
.filter(|arg| arg.intent.is_input())
{
if let Ok(data) = state.eval(&parameter.location) {
possible_referenced_ids.append(&mut data.referenced_ids());
}
}
}
possible_referenced_ids = state
.add_recursively_referenced_ids_to_id_set(possible_referenced_ids);
// Delete content of all referenced objects, as the function may write to them.
for id in possible_referenced_ids.iter() {
new_state
.memory
.mark_mem_object_as_untracked(id, &possible_referenced_ids);
}
return Some(new_state);
}
}
} else {
panic!("Extern symbol not found.");
}
}
Label::Indirect(_) => unimplemented!("Handling of indirect edges not yet implemented"), // Right now this case should not exist. Decide how to handle only after it can actually occur.
}
}
fn specialize_conditional(
&self,
value: &State,
_condition: &Expression,
_is_true: bool,
) -> Option<State> {
// TODO: implement some real specialization of conditionals!
Some(value.clone())
}
}
impl<'a> Context<'a> {
fn get_current_stack_offset(&self, state: &State) -> BitvectorDomain {
if let Ok(Data::Pointer(ref stack_pointer)) =
state.get_register(&self.project.stack_pointer_register)
{
if stack_pointer.iter_targets().len() == 1 {
// TODO: add sanity check that the stack id is the expected id
let (_stack_id, stack_offset_domain) = stack_pointer.iter_targets().next().unwrap();
stack_offset_domain.clone()
} else {
BitvectorDomain::new_top(self.project.stack_pointer_register.bitsize().unwrap())
}
} else {
BitvectorDomain::new_top(self.project.stack_pointer_register.bitsize().unwrap())
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::bil::variable::*;
fn bv(value: i64) -> BitvectorDomain {
BitvectorDomain::Value(Bitvector::from_i64(value))
}
fn new_id(time: &str, reg_name: &str) -> AbstractIdentifier {
AbstractIdentifier::new(
Tid::new(time),
AbstractLocation::Register(reg_name.to_string(), 64),
)
}
fn mock_extern_symbol(name: &str) -> ExternSymbol {
use crate::bil;
let arg = Arg {
var: register("RAX"),
location: bil::Expression::Var(register("RAX")),
intent: ArgIntent::Both,
};
ExternSymbol {
tid: Tid::new("extern_".to_string() + name),
address: "somewhere".into(),
name: name.into(),
calling_convention: None,
arguments: vec![arg],
}
}
fn register(name: &str) -> Variable {
Variable {
name: name.into(),
type_: crate::bil::variable::Type::Immediate(64),
is_temp: false,
}
}
fn call_term(target_name: &str) -> Term<Jmp> {
let call = Call {
target: Label::Direct(Tid::new(target_name)),
return_: None,
};
Term {
tid: Tid::new(format!("call_{}", target_name)),
term: Jmp {
condition: None,
kind: JmpKind::Call(call),
},
}
}
fn mock_project() -> Project {
let program = Program {
subs: Vec::new(),
extern_symbols: vec![
mock_extern_symbol("malloc"),
mock_extern_symbol("free"),
mock_extern_symbol("other"),
],
entry_points: Vec::new(),
};
let program_term = Term {
tid: Tid::new("program"),
term: program,
};
Project {
program: program_term,
cpu_architecture: "mock_arch".to_string(),
stack_pointer_register: register("RSP"),
callee_saved_registers: vec!["callee_saved_reg".to_string()],
parameter_registers: vec!["RAX".to_string()],
}
}
#[test]
fn context_problem_implementation() {
use crate::analysis::interprocedural_fixpoint::Problem;
use crate::analysis::pointer_inference::data::*;
use crate::bil::*;
use Expression::*;
let project = mock_project();
let (cwe_sender, _cwe_receiver) = crossbeam_channel::unbounded();
let (log_sender, _log_receiver) = crossbeam_channel::unbounded();
let context = Context::new(&project, cwe_sender, log_sender);
let mut state = State::new(&register("RSP"), Tid::new("main"));
let def = Term {
tid: Tid::new("def"),
term: Def {
lhs: register("RSP"),
rhs: BinOp {
op: BinOpType::PLUS,
lhs: Box::new(Var(register("RSP"))),
rhs: Box::new(Const(Bitvector::from_i64(-16))),
},
},
};
let store_term = Term {
tid: Tid::new("store"),
term: Def {
lhs: register("memory"), // technically false, but not checked at the moment
rhs: Store {
address: Box::new(Var(register("RSP"))),
endian: Endianness::LittleEndian,
memory: Box::new(Var(register("memory"))), // This is technically false, but the field is ignored at the moment
value: Box::new(Const(Bitvector::from_i64(42))),
size: 64,
},
},
};
// test update_def
state = context.update_def(&state, &def);
let stack_pointer = Data::Pointer(PointerDomain::new(new_id("main", "RSP"), bv(-16)));
assert_eq!(state.eval(&Var(register("RSP"))).unwrap(), stack_pointer);
state = context.update_def(&state, &store_term);
// Test update_call
let target_block = Term {
tid: Tid::new("func_start"),
term: Blk {
defs: Vec::new(),
jmps: Vec::new(),
},
};
let target_node = crate::analysis::graph::Node::BlkStart(&target_block);
let call = call_term("func");
let mut callee_state = context.update_call(&state, &call, &target_node);
assert_eq!(callee_state.stack_id, new_id("func", "RSP"));
assert_eq!(callee_state.caller_ids.len(), 1);
assert_eq!(
callee_state.caller_ids.iter().next().unwrap(),
&new_id("call_func", "RSP")
);
callee_state
.memory
.set_value(
PointerDomain::new(new_id("func", "RSP"), bv(-30)),
Data::Value(bv(33).into()),
)
.unwrap();
let return_state = context
.update_return(&callee_state, Some(&state), &call)
.unwrap();
assert_eq!(return_state.stack_id, new_id("main", "RSP"));
assert_eq!(return_state.caller_ids, BTreeSet::new());
assert_eq!(
return_state.memory.get_internal_id_map(),
state.memory.get_internal_id_map()
);
assert_eq!(
return_state.get_register(&register("RSP")).unwrap(),
state.get_register(&register("RSP")).unwrap()
);
state
.set_register(&register("callee_saved_reg"), Data::Value(bv(13)))
.unwrap();
state
.set_register(&register("other_reg"), Data::Value(bv(14)))
.unwrap();
let malloc = call_term("extern_malloc");
let mut state_after_malloc = context.update_call_stub(&state, &malloc).unwrap();
assert_eq!(
state_after_malloc.get_register(&register("RAX")).unwrap(),
Data::Pointer(PointerDomain::new(
new_id("call_extern_malloc", "RAX"),
bv(0)
))
);
assert_eq!(state_after_malloc.memory.get_num_objects(), 2);
assert_eq!(
state_after_malloc.get_register(&register("RSP")).unwrap(),
state
.get_register(&register("RSP"))
.unwrap()
.bin_op(BinOpType::PLUS, &Data::Value(bv(8)))
);
assert_eq!(
state_after_malloc
.get_register(&register("callee_saved_reg"))
.unwrap(),
Data::Value(bv(13))
);
assert!(state_after_malloc
.get_register(&register("other_reg"))
.unwrap()
.is_top());
state_after_malloc
.set_register(
&register("callee_saved_reg"),
Data::Pointer(PointerDomain::new(
new_id("call_extern_malloc", "RAX"),
bv(0),
)),
)
.unwrap();
let free = call_term("extern_free");
let state_after_free = context
.update_call_stub(&state_after_malloc, &free)
.unwrap();
assert!(state_after_free
.get_register(&register("RAX"))
.unwrap()
.is_top());
assert_eq!(state_after_free.memory.get_num_objects(), 2);
assert_eq!(
state_after_free
.get_register(&register("callee_saved_reg"))
.unwrap(),
Data::Pointer(PointerDomain::new(
new_id("call_extern_malloc", "RAX"),
bv(0)
))
);
let other_extern_fn = call_term("extern_other");
let state_after_other_fn = context.update_call_stub(&state, &other_extern_fn).unwrap();
assert_eq!(
state_after_other_fn.get_register(&register("RSP")).unwrap(),
state
.get_register(&register("RSP"))
.unwrap()
.bin_op(BinOpType::PLUS, &Data::Value(bv(8)))
);
assert_eq!(
state_after_other_fn
.get_register(&register("callee_saved_reg"))
.unwrap(),
Data::Value(bv(13))
);
assert!(state_after_other_fn
.get_register(&register("other_reg"))
.unwrap()
.is_top());
}
}
use super::identifier::*;
use crate::analysis::abstract_domain::*;
use crate::bil::*;
use crate::prelude::*;
use std::collections::{BTreeMap, BTreeSet};
use std::convert::TryFrom;
/// An abstract value representing either a pointer or a constant value.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub enum Data {
Top(BitSize),
Pointer(PointerDomain),
Value(BitvectorDomain),
}
impl Data {
pub fn bitvector(bitv: Bitvector) -> Data {
Data::Value(BitvectorDomain::Value(bitv))
}
/// For pointer values replace an abstract identifier with another one and add the offset_adjustment to the pointer offset.
/// This is needed to adjust stack pointer on call and return instructions.
pub fn replace_abstract_id(
&mut self,
old_id: &AbstractIdentifier,
new_id: &AbstractIdentifier,
offset_adjustment: &BitvectorDomain,
) {
if let Self::Pointer(pointer) = self {
pointer.replace_abstract_id(old_id, new_id, offset_adjustment);
}
}
pub fn referenced_ids(&self) -> BTreeSet<AbstractIdentifier> {
if let Self::Pointer(pointer) = self {
pointer.0.keys().cloned().collect()
} else {
BTreeSet::new()
}
}
}
impl Data {
pub fn to_json_compact(&self) -> serde_json::Value {
match self {
Self::Top(bitsize) => serde_json::Value::String(format!("Top:{}", bitsize)),
Self::Pointer(pointer) => {
let target_iter = pointer.iter_targets().map(|(id, offset)| {
(
format!("{}", id),
serde_json::Value::String(format!("{}", offset)),
)
});
let targets = serde_json::Value::Object(target_iter.collect());
let mut obj_map = serde_json::Map::new();
obj_map.insert("Pointer".to_string(), targets);
serde_json::Value::Object(obj_map)
}
Self::Value(bitvector) => serde_json::Value::String(format!("Value: {}", bitvector)),
}
}
}
impl<'a> TryFrom<&'a Data> for &'a Bitvector {
type Error = ();
fn try_from(value: &'a Data) -> Result<&'a Bitvector, Self::Error> {
if let Data::Value(BitvectorDomain::Value(bitvector)) = value {
Ok(bitvector)
} else {
Err(())
}
}
}
impl From<BitvectorDomain> for Data {
fn from(value: BitvectorDomain) -> Data {
Data::Value(value)
}
}
/// An abstract value representing a pointer given as a map from an abstract identifier
/// to the offset in the pointed to object.
///
/// The map should never be empty. If the map contains more than one key,
/// it indicates that the pointer may point to any of the contained objects.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct PointerDomain(BTreeMap<AbstractIdentifier, BitvectorDomain>);
impl PointerDomain {
pub fn new(target: AbstractIdentifier, offset: BitvectorDomain) -> PointerDomain {
let mut map = BTreeMap::new();
map.insert(target, offset);
PointerDomain(map)
}
pub fn with_targets(targets: BTreeMap<AbstractIdentifier, BitvectorDomain>) -> PointerDomain {
PointerDomain(targets)
}
/// get the bitsize of the pointer
pub fn bitsize(&self) -> BitSize {
let some_elem = self.0.values().next().unwrap();
some_elem.bitsize()
}
pub fn merge(&self, other: &PointerDomain) -> PointerDomain {
let mut merged_map = self.0.clone();
for (location, offset) in other.0.iter() {
if merged_map.contains_key(location) {
merged_map.insert(location.clone(), merged_map[location].merge(offset));
} else {
merged_map.insert(location.clone(), offset.clone());
}
}
PointerDomain(merged_map)
}
/// Add a new target to the pointer.
/// If the pointer already contains a target with the same abstract identifier, the offsets of both targets get merged.
pub fn add_target(&mut self, target: AbstractIdentifier, offset: BitvectorDomain) {
if let Some(old_offset) = self.0.get(&target) {
let merged_offset = old_offset.merge(&offset);
self.0.insert(target, merged_offset);
} else {
self.0.insert(target, offset);
}
}
/// Replace an abstract identifier with another one and add the offset_adjustment to the pointer offset.
/// This is needed to adjust stack pointer on call and return instructions.
pub fn replace_abstract_id(
&mut self,
old_id: &AbstractIdentifier,
new_id: &AbstractIdentifier,
offset_adjustment: &BitvectorDomain,
) {
if let Some(old_offset) = self.0.get(&old_id) {
let new_offset = old_offset.clone() + offset_adjustment.clone();
self.0.remove(old_id);
self.0.insert(new_id.clone(), new_offset);
}
}
/// add a value to the offset
pub fn add_to_offset(&self, value: &BitvectorDomain) -> PointerDomain {
let mut result = self.clone();
for offset in result.0.values_mut() {
*offset = offset.bin_op(BinOpType::PLUS, value);
}
result
}
/// subtract a value from the offset
pub fn sub_from_offset(&self, value: &BitvectorDomain) -> PointerDomain {
let mut result = self.clone();
for offset in result.0.values_mut() {
*offset = offset.bin_op(BinOpType::MINUS, value);
}
result
}
/// Get an iterator over all possible abstract targets (together with the offset in the target) the pointer may point to.
pub fn iter_targets(
&self,
) -> std::collections::btree_map::Iter<AbstractIdentifier, BitvectorDomain> {
self.0.iter()
}
pub fn get_target_ids(&self) -> BTreeSet<AbstractIdentifier> {
self.0.keys().cloned().collect()
}
}
impl PointerDomain {
pub fn to_json_compact(&self) -> serde_json::Value {
serde_json::Value::Object(
self.0
.iter()
.map(|(id, offset)| {
(
format!("{}", id),
serde_json::Value::String(format!("{}", offset)),
)
})
.collect(),
)
}
}
impl ValueDomain for Data {
fn bitsize(&self) -> BitSize {
use Data::*;
match self {
Top(size) => *size,
Pointer(pointer) => pointer.bitsize(),
Value(bitvec) => bitvec.bitsize(),
}
}
fn new_top(bitsize: BitSize) -> Data {
Data::Top(bitsize)
}
/// Compute the (abstract) result of a binary operation
fn bin_op(&self, op: BinOpType, rhs: &Self) -> Self {
use BinOpType::*;
use Data::*;
match (self, op, rhs) {
(Value(left), _, Value(right)) => Value(left.bin_op(op, right)),
(Pointer(pointer), PLUS, Value(value)) | (Value(value), PLUS, Pointer(pointer)) => {
Pointer(pointer.add_to_offset(value))
}
(Pointer(pointer), MINUS, Value(value)) => Pointer(pointer.sub_from_offset(value)),
// TODO: AND and OR binops may be used to compute pointers when alignment information about the pointer is known.
(_, EQ, _) | (_, NEQ, _) | (_, LT, _) | (_, LE, _) | (_, SLT, _) | (_, SLE, _) => {
BitvectorDomain::new_top(1).into()
}
(_, PLUS, _)
| (_, MINUS, _)
| (_, TIMES, _)
| (_, DIVIDE, _)
| (_, SDIVIDE, _)
| (_, MOD, _)
| (_, SMOD, _)
| (_, LSHIFT, _)
| (_, RSHIFT, _)
| (_, ARSHIFT, _)
| (_, AND, _)
| (_, OR, _)
| (_, XOR, _) => Data::new_top(self.bitsize()),
}
}
/// Compute the (abstract) result of a unary operation
fn un_op(&self, op: UnOpType) -> Self {
if let Data::Value(value) = self {
Data::Value(value.un_op(op))
} else {
Data::new_top(self.bitsize())
}
}
/// extract a sub-bitvector
fn extract(&self, low_bit: BitSize, high_bit: BitSize) -> Self {
if let Data::Value(value) = self {
Data::Value(value.extract(low_bit, high_bit))
} else {
Data::new_top(high_bit - low_bit + 1)
}
}
/// Extend a bitvector using the given cast type
fn cast(&self, kind: CastType, width: BitSize) -> Self {
if self.bitsize() == width {
// The cast is a no-op.
return self.clone();
}
if let Data::Value(value) = self {
Data::Value(value.cast(kind, width))
} else {
Data::new_top(width)
}
}
/// Concatenate two bitvectors
fn concat(&self, other: &Self) -> Self {
if let (Data::Value(upper_bits), Data::Value(lower_bits)) = (self, other) {
Data::Value(upper_bits.concat(lower_bits))
} else {
Data::new_top(self.bitsize() + other.bitsize())
}
}
}
impl AbstractDomain for Data {
fn top(&self) -> Self {
Data::Top(self.bitsize())
}
fn merge(&self, other: &Self) -> Self {
use Data::*;
match (self, other) {
(Top(bitsize), _) | (_, Top(bitsize)) => Top(*bitsize),
(Pointer(pointer1), Pointer(pointer2)) => Pointer(pointer1.merge(pointer2)),
(Value(val1), Value(val2)) => Value(val1.merge(val2)),
(Pointer(_), Value(_)) | (Value(_), Pointer(_)) => Top(self.bitsize()),
}
}
/// Return whether the element represents a top element or not.
fn is_top(&self) -> bool {
matches!(self, Self::Top(_))
}
}
impl From<PointerDomain> for Data {
fn from(val: PointerDomain) -> Data {
Data::Pointer(val)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn bv(value: i64) -> BitvectorDomain {
BitvectorDomain::Value(Bitvector::from_i64(value))
}
fn new_id(name: String) -> AbstractIdentifier {
AbstractIdentifier::new(Tid::new("time0"), AbstractLocation::Register(name, 64))
}
fn new_pointer_domain(location: String, offset: i64) -> PointerDomain {
let id = new_id(location);
PointerDomain::new(id, bv(offset))
}
fn new_pointer(location: String, offset: i64) -> Data {
Data::Pointer(new_pointer_domain(location, offset))
}
fn new_value(value: i64) -> Data {
Data::Value(bv(value))
}
#[test]
fn data_abstract_domain() {
let pointer = new_pointer("Rax".into(), 0);
let data = new_value(42);
assert_eq!(pointer.merge(&pointer), pointer);
assert_eq!(pointer.merge(&data), Data::new_top(64));
assert_eq!(
data.merge(&new_value(41)),
Data::Value(BitvectorDomain::new_top(64))
);
let other_pointer = new_pointer("Rbx".into(), 0);
match pointer.merge(&other_pointer) {
Data::Pointer(_) => (),
_ => panic!(),
}
}
#[test]
fn data_value_domain() {
use crate::bil::BinOpType::*;
let data = new_value(42);
assert_eq!(data.bitsize(), 64);
let three = new_value(3);
let pointer = new_pointer("Rax".into(), 0);
assert_eq!(data.bin_op(PLUS, &three), new_value(45));
assert_eq!(pointer.bin_op(PLUS, &three), new_pointer("Rax".into(), 3));
assert_eq!(three.un_op(crate::bil::UnOpType::NEG), new_value(-3));
assert_eq!(
three.extract(0, 31),
Data::Value(BitvectorDomain::Value(Bitvector::from_i32(3)))
);
assert_eq!(data.cast(crate::bil::CastType::SIGNED, 128).bitsize(), 128);
let one = Data::Value(BitvectorDomain::Value(Bitvector::from_i32(1)));
let two = Data::Value(BitvectorDomain::Value(Bitvector::from_i32(2)));
let concat = new_value((1 << 32) + 2);
assert_eq!(one.concat(&two), concat);
}
#[test]
fn pointer_domain() {
let pointer = new_pointer_domain("Rax".into(), 0);
let offset = bv(3);
let pointer_plus = new_pointer_domain("Rax".into(), 3);
let pointer_minus = new_pointer_domain("Rax".into(), -3);
assert_eq!(pointer.add_to_offset(&offset), pointer_plus);
assert_eq!(pointer.sub_from_offset(&offset), pointer_minus);
let other_pointer = new_pointer_domain("Rbx".into(), 5);
let merged = pointer.merge(&other_pointer);
assert_eq!(merged.0.len(), 2);
assert_eq!(merged.0.get(&new_id("Rax".into())), Some(&bv(0)));
assert_eq!(merged.0.get(&new_id("Rbx".into())), Some(&bv(5)));
}
}
use crate::bil::variable::*;
use crate::prelude::*;
use crate::utils::fast_cmp_arc::FastCmpArc;
use std::sync::Arc;
// TODO: Right now abstract locations are used as giving the location where a pointer to an object is located.
// But it could also be used to point into the object (at offset 0).
// Can I solve this possible ambivalence in intended usage in a way such that accidentally wrong usage is prevented?
// If not, I have to document the intended usage with a big warning sign.
/// An abstract identifier is given by a time identifier and a location identifier.
///
/// For the location identifier see `AbstractLocation`.
/// The time identifier is given by a `Tid`.
/// If it is the Tid of a basic block, then it describes the point in time *before* execution of the first instruction in the block.
/// If it is the Tid of a Def or Jmp, then it describes the point in time *after* the execution of the Def or Jmp.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub struct AbstractIdentifier(FastCmpArc<AbstractIdentifierData>);
/// The data contained in an abstract identifier
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub struct AbstractIdentifierData {
time: Tid,
location: AbstractLocation,
}
impl AbstractIdentifier {
/// create a new abstract identifier
pub fn new(time: Tid, location: AbstractLocation) -> AbstractIdentifier {
AbstractIdentifier(FastCmpArc(Arc::new(AbstractIdentifierData {
time,
location,
})))
}
}
impl std::fmt::Display for AbstractIdentifier {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(formatter, "{} @ {}", self.0.time, self.0.location)
}
}
/// An abstract location describes how to find the value of a variable in memory at a given time.
///
/// It is defined recursively, where the root is always a register.
/// This way only locations that the local state knows about are representable.
/// It is also impossible to accidently describe circular references.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum AbstractLocation {
Register(String, BitSize),
Pointer(String, AbstractMemoryLocation),
}
impl std::fmt::Display for AbstractLocation {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Register(name, _size) => write!(formatter, "{}", name),
Self::Pointer(reg_name, location) => write!(formatter, "{}->{}", reg_name, location),
}
}
}
impl AbstractLocation {
/// Create an abstract location from a variable corresponding to a register.
/// This function returns an error if the variable is not a physical register.
pub fn from_var(variable: &Variable) -> Result<AbstractLocation, Error> {
if variable.is_temp {
return Err(anyhow!(
"Cannot create abstract location from temporary variables."
));
}
Ok(AbstractLocation::Register(
variable.name.clone(),
variable.bitsize()?,
))
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum AbstractMemoryLocation {
Location {
offset: isize,
size: usize,
},
Pointer {
offset: isize,
size: usize,
target: Box<AbstractMemoryLocation>,
},
}
impl std::fmt::Display for AbstractMemoryLocation {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Location { offset, .. } => write!(formatter, "({})", offset),
Self::Pointer {
offset,
size: _,
target,
} => write!(formatter, "({})->{}", offset, target),
}
}
}
use super::interprocedural_fixpoint::{Computation, NodeValue};
use crate::analysis::graph::{Graph, Node};
use crate::term::*;
use crate::utils::log::*;
use petgraph::graph::NodeIndex;
use petgraph::visit::IntoNodeReferences;
use petgraph::Direction;
use std::collections::HashMap;
mod context;
mod data;
mod identifier;
mod object;
mod object_list;
mod state;
use context::Context;
use state::State;
pub struct PointerInference<'a> {
computation: Computation<'a, Context<'a>>,
log_collector: crossbeam_channel::Sender<LogMessage>,
}
impl<'a> PointerInference<'a> {
pub fn new(
project: &'a Project,
cwe_sender: crossbeam_channel::Sender<CweWarning>,
log_sender: crossbeam_channel::Sender<LogMessage>,
) -> PointerInference<'a> {
let context = Context::new(project, cwe_sender, log_sender.clone());
let mut entry_sub_to_entry_blocks_map = HashMap::new();
let subs: HashMap<Tid, &Term<Sub>> = project
.program
.term
.subs
.iter()
.map(|sub| (sub.tid.clone(), sub))
.collect();
for sub_tid in project.program.term.entry_points.iter() {
if let Some(sub) = subs.get(sub_tid) {
if let Some(entry_block) = sub.term.blocks.iter().next() {
entry_sub_to_entry_blocks_map.insert(sub_tid, entry_block.tid.clone());
}
}
}
let tid_to_graph_indices_map = super::graph::get_indices_of_block_nodes(
&context.graph,
entry_sub_to_entry_blocks_map.values(),
);
let entry_sub_to_entry_node_map: HashMap<Tid, NodeIndex> = entry_sub_to_entry_blocks_map
.into_iter()
.filter_map(|(sub_tid, block_tid)| {
if let Some((start_node_index, _end_node_index)) =
tid_to_graph_indices_map.get(&block_tid)
{
Some((sub_tid.clone(), start_node_index.clone()))
} else {
None
}
})
.collect();
let mut fixpoint_computation =
super::interprocedural_fixpoint::Computation::new(context, None);
log_sender
.send(LogMessage {
text: format!(
"Pointer Inference: Adding {} entry points",
entry_sub_to_entry_node_map.len()
),
level: LogLevel::Debug,
location: None,
})
.unwrap();
for (sub_tid, start_node_index) in entry_sub_to_entry_node_map.into_iter() {
fixpoint_computation.set_node_value(
start_node_index,
super::interprocedural_fixpoint::NodeValue::Value(State::new(
&project.stack_pointer_register,
sub_tid,
)),
);
}
PointerInference {
computation: fixpoint_computation,
log_collector: log_sender,
}
}
pub fn compute(&mut self) {
self.computation.compute_with_max_steps(100); // TODO: make max_steps configurable!
}
pub fn print_yaml(&self) {
// Print results serialized as YAML to stdout
let graph = self.computation.get_graph();
for (node_index, value) in self.computation.node_values().iter() {
let node = graph.node_weight(*node_index).unwrap();
if let Ok(string) = serde_yaml::to_string(&(node, value)) {
println!("{}", string);
} else {
println!(
"Serializing failed at {:?} with {:?}",
node_index,
serde_yaml::to_string(value)
);
}
}
}
pub fn generate_compact_json(&self) -> serde_json::Value {
let graph = self.computation.get_graph();
let mut json_nodes = serde_json::Map::new();
for (node_index, node_value) in self.computation.node_values().iter() {
let node = graph.node_weight(*node_index).unwrap();
if let NodeValue::Value(value) = node_value {
json_nodes.insert(format!("{}", node), value.to_json_compact());
}
}
serde_json::Value::Object(json_nodes)
}
pub fn print_compact_json(&self) {
println!("{:#}", self.generate_compact_json());
}
pub fn get_graph(&self) -> &Graph {
self.computation.get_graph()
}
/// Add speculative entry points to the fixpoint algorithm state.
///
/// Since indirect jumps and calls are not handled yet (TODO: change that),
/// the analysis may miss a *lot* of code in some cases.
/// To remedy this somewhat,
/// we mark all function starts, that are also roots in the control flow graph,
/// and do not have a state assigned to them yet, as additional entry points.
///
/// If `only_cfg_roots` is set to `false`, then all function starts without a state are marked as roots.
fn add_speculative_entry_points(&mut self, project: &Project, only_cfg_roots: bool) {
// TODO: Refactor the fixpoint computation structs, so that the project reference can be extracted from them.
let mut start_block_to_sub_map: HashMap<&Tid, &Term<Sub>> = HashMap::new();
for sub in project.program.term.subs.iter() {
if project
.program
.term
.extern_symbols
.iter()
.find(|symbol| symbol.tid == sub.tid)
.is_some()
{
continue; // We ignore functions marked as extern symbols.
}
if let Some(start_block) = sub.term.blocks.first() {
start_block_to_sub_map.insert(&start_block.tid, sub);
}
}
let graph = self.computation.get_graph();
let mut new_entry_points = Vec::new();
for (node_id, node) in graph.node_references() {
if let Node::BlkStart(block) = node {
if start_block_to_sub_map.get(&block.tid).is_some()
&& self.computation.get_node_value(node_id).is_none()
{
if only_cfg_roots
&& graph
.neighbors_directed(node_id, Direction::Incoming)
.next()
.is_none()
{
new_entry_points.push(node_id);
} else if !only_cfg_roots {
new_entry_points.push(node_id);
}
}
}
}
self.log_debug(format!(
"Pointer Inference: Adding {} speculative entry points",
new_entry_points.len()
));
for entry in new_entry_points {
let sub_tid = start_block_to_sub_map
[&self.computation.get_graph()[entry].get_block().tid]
.tid
.clone();
self.computation.set_node_value(
entry,
super::interprocedural_fixpoint::NodeValue::Value(State::new(
&project.stack_pointer_register,
sub_tid,
)),
);
}
}
fn count_blocks_with_state(&self) {
let graph = self.computation.get_graph();
let mut stateful_blocks: i64 = 0;
let mut all_blocks: i64 = 0;
for (node_id, node) in graph.node_references() {
if let Node::BlkStart(_block) = node {
all_blocks += 1;
if self.computation.get_node_value(node_id).is_some() {
stateful_blocks += 1;
}
}
}
self.log_debug(format!(
"Pointer Inference: Blocks with state: {} / {}",
stateful_blocks, all_blocks
));
}
fn log_debug(&self, msg: impl Into<String>) {
let log_msg = LogMessage {
text: msg.into(),
level: LogLevel::Debug,
location: None,
};
self.log_collector.send(log_msg).unwrap();
}
}
pub fn run(project: &Project, print_debug: bool) -> (Vec<CweWarning>, Vec<String>) {
let (cwe_sender, cwe_receiver) = crossbeam_channel::unbounded();
let (log_sender, log_receiver) = crossbeam_channel::unbounded();
let warning_collector_thread = std::thread::spawn(move || collect_cwe_warnings(cwe_receiver));
let log_collector_thread = std::thread::spawn(move || collect_logs(log_receiver));
{
// Scope the computation object so that it is dropped before the warning collector thread is joined.
// Else the warning collector thread will not terminate (the cwe_sender needs to be dropped for it to terminate).
let mut computation = PointerInference::new(project, cwe_sender, log_sender);
computation.compute();
computation.count_blocks_with_state();
// Now compute again with speculative entry points added
computation.add_speculative_entry_points(project, true);
computation.compute();
computation.count_blocks_with_state();
// Now compute again with all missed functions as additional entry points
computation.add_speculative_entry_points(project, false);
computation.compute();
computation.count_blocks_with_state();
if print_debug {
computation.print_compact_json();
}
}
// Return the CWE warnings
(
warning_collector_thread.join().unwrap(),
log_collector_thread.join().unwrap(),
)
}
fn collect_cwe_warnings(receiver: crossbeam_channel::Receiver<CweWarning>) -> Vec<CweWarning> {
let mut collected_warnings = HashMap::new();
while let Ok(warning) = receiver.recv() {
match &warning.addresses[..] {
[] => unimplemented!(),
[address, ..] => {
collected_warnings.insert(address.clone(), warning);
}
}
}
collected_warnings
.drain()
.map(|(_key, value)| value)
.collect()
}
fn collect_logs(receiver: crossbeam_channel::Receiver<LogMessage>) -> Vec<String> {
let mut logs_with_address = HashMap::new();
let mut general_logs = Vec::new();
while let Ok(log_message) = receiver.recv() {
if let Some(ref tid) = log_message.location {
logs_with_address.insert(tid.address.clone(), log_message);
} else {
general_logs.push(log_message);
}
}
logs_with_address
.values()
.cloned()
.chain(general_logs.into_iter())
.map(|msg| msg.to_string())
.collect()
}
use super::data::*;
use super::identifier::AbstractIdentifier;
use crate::analysis::abstract_domain::*;
use crate::analysis::mem_region::MemRegion;
use crate::bil::Bitvector;
use crate::prelude::*;
use serde::{Deserialize, Serialize};
use std::collections::BTreeSet;
use std::iter::FromIterator;
/// An abstract object is either a tracked or an untracked memory object.
/// In the untracked case we still track whether the object may contain pointers to other objects.
/// This way we do not necessarily need to invalidate all abstract objects
/// if a pointer contained in an untracked object is used for a memory write.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub enum AbstractObject {
Untracked(BTreeSet<AbstractIdentifier>),
Memory(AbstractObjectInfo),
}
impl AbstractObject {
pub fn new(type_: ObjectType, address_bitsize: BitSize) -> AbstractObject {
Self::Memory(AbstractObjectInfo {
pointer_targets: BTreeSet::new(),
is_unique: true,
state: Some(ObjectState::Alive),
type_: Some(type_),
memory: MemRegion::new(address_bitsize),
})
}
pub fn get_value(&self, offset: Bitvector, bitsize: BitSize) -> Data {
if let Self::Memory(object_info) = self {
object_info.get_value(offset, bitsize)
} else {
Data::new_top(bitsize)
}
}
pub fn merge(&self, other: &Self) -> Self {
match (self, other) {
(Self::Untracked(set1), Self::Untracked(set2)) => {
Self::Untracked(set1.union(set2).cloned().collect())
}
(Self::Untracked(untracked), Self::Memory(memory))
| (Self::Memory(memory), Self::Untracked(untracked)) => {
Self::Untracked(untracked.union(&memory.pointer_targets).cloned().collect())
}
(Self::Memory(left), Self::Memory(right)) => Self::Memory(left.merge(right)),
}
}
pub fn set_value(&mut self, value: Data, offset: BitvectorDomain) -> Result<(), Error> {
match self {
Self::Untracked(target_list) => {
if let Data::Pointer(ref pointer) = value {
target_list.extend(
pointer
.iter_targets()
.map(|(abstract_id, _offset)| abstract_id.clone()),
)
};
}
Self::Memory(memory_object) => {
memory_object.set_value(value, offset)?;
}
};
Ok(())
}
pub fn get_all_possible_pointer_targets(&self) -> BTreeSet<AbstractIdentifier> {
match self {
Self::Untracked(targets) => targets.clone(),
Self::Memory(memory) => memory.get_all_possible_pointer_targets(),
}
}
/// For pointer values replace an abstract identifier with another one and add the offset_adjustment to the pointer offset.
/// This is needed to adjust stack pointer on call and return instructions.
pub fn replace_abstract_id(
&mut self,
old_id: &AbstractIdentifier,
new_id: &AbstractIdentifier,
offset_adjustment: &BitvectorDomain,
) {
match self {
Self::Untracked(id_set) => {
if id_set.get(old_id).is_some() {
id_set.remove(old_id);
id_set.insert(new_id.clone());
}
}
Self::Memory(mem_object) => {
mem_object.replace_abstract_id(old_id, new_id, offset_adjustment);
}
}
}
pub fn get_referenced_ids(&self) -> BTreeSet<AbstractIdentifier> {
match self {
Self::Untracked(ids) => ids.clone(),
Self::Memory(object_info) => object_info.pointer_targets.clone(),
}
}
pub fn set_state(&mut self, new_state: Option<ObjectState>) {
if let Self::Memory(object_info) = self {
object_info.set_state(new_state)
}
}
#[cfg(test)]
pub fn get_state(&self) -> Option<ObjectState> {
match self {
Self::Untracked(_) => None,
Self::Memory(mem) => mem.state,
}
}
}
impl AbstractObject {
pub fn to_json_compact(&self) -> serde_json::Value {
match self {
Self::Untracked(_) => serde_json::Value::String("Untracked".into()),
Self::Memory(object_info) => {
let mut elements = Vec::new();
elements.push((
"is_unique".to_string(),
serde_json::Value::String(format!("{}", object_info.is_unique)),
));
elements.push((
"state".to_string(),
serde_json::Value::String(format!("{:?}", object_info.state)),
));
elements.push((
"type".to_string(),
serde_json::Value::String(format!("{:?}", object_info.type_)),
));
let memory = object_info
.memory
.iter()
.map(|(index, value)| (format!("{}", index), value.to_json_compact()));
elements.push((
"memory".to_string(),
serde_json::Value::Object(serde_json::Map::from_iter(memory)),
));
serde_json::Value::Object(serde_json::Map::from_iter(elements.into_iter()))
}
}
}
}
/// The abstract object info contains all information that we track for an abstract object.
///
/// Some noteworthy properties:
/// - The field *is_unique* indicates whether the object is the union of several memory objects
/// - The *state* indicates whether the object is still alive or not.
/// This can be used to detect "use after free" bugs.
/// - Many fields are wrapped in Option<_> to indicate whether the property is known or not.
/// - The field pointer_targets is a (coarse) upper approximation of all possible targets
/// for which pointers may exist inside the memory region.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct AbstractObjectInfo {
pointer_targets: BTreeSet<AbstractIdentifier>,
pub is_unique: bool,
pub state: Option<ObjectState>,
type_: Option<ObjectType>,
memory: MemRegion<Data>,
}
impl AbstractObjectInfo {
fn get_value(&self, offset: Bitvector, bitsize: BitSize) -> Data {
// TODO: This function does not check whether a data read is "sound", e.g. that the offset is inside the object.
// Make sure that this is checked somewhere!
assert_eq!(bitsize % 8, 0);
self.memory.get(offset, (bitsize / 8) as u64)
}
fn set_value(&mut self, value: Data, offset: BitvectorDomain) -> Result<(), Error> {
if let Data::Pointer(ref pointer) = value {
self.pointer_targets.extend(
pointer
.iter_targets()
.map(|(abstract_id, _offset)| abstract_id.clone()),
)
};
if let BitvectorDomain::Value(ref concrete_offset) = offset {
if self.is_unique {
self.memory.add(value, concrete_offset.clone());
} else {
let merged_value = self
.memory
.get(concrete_offset.clone(), (value.bitsize() / 8) as u64)
.merge(&value);
self.memory.add(merged_value, concrete_offset.clone());
};
} else {
self.memory = MemRegion::new(self.memory.get_address_bitsize());
}
return Ok(());
}
fn get_all_possible_pointer_targets(&self) -> BTreeSet<AbstractIdentifier> {
let mut targets = self.pointer_targets.clone();
for elem in self.memory.iter_values() {
if let Data::Pointer(pointer) = elem {
for (id, _) in pointer.iter_targets() {
targets.insert(id.clone());
}
};
}
return targets;
}
/// For pointer values replace an abstract identifier with another one and add the offset_adjustment to the pointer offsets.
/// This is needed to adjust stack pointers on call and return instructions.
pub fn replace_abstract_id(
&mut self,
old_id: &AbstractIdentifier,
new_id: &AbstractIdentifier,
offset_adjustment: &BitvectorDomain,
) {
for elem in self.memory.iter_values_mut() {
elem.replace_abstract_id(old_id, new_id, offset_adjustment);
}
if self.pointer_targets.get(&old_id).is_some() {
self.pointer_targets.remove(&old_id);
self.pointer_targets.insert(new_id.clone());
}
}
pub fn set_state(&mut self, new_state: Option<ObjectState>) {
if self.is_unique {
self.state = new_state;
} else {
if self.state != new_state {
self.state = None;
} // else don't change the state
}
}
}
impl AbstractDomain for AbstractObjectInfo {
fn top(&self) -> Self {
AbstractObjectInfo {
pointer_targets: BTreeSet::new(),
is_unique: false,
state: None,
type_: None,
memory: MemRegion::new(self.memory.get_address_bitsize()),
}
}
fn merge(&self, other: &Self) -> Self {
AbstractObjectInfo {
pointer_targets: self
.pointer_targets
.union(&other.pointer_targets)
.cloned()
.collect(),
is_unique: self.is_unique && other.is_unique,
state: same_or_none(&self.state, &other.state),
type_: same_or_none(&self.type_, &other.type_),
memory: self.memory.merge(&other.memory),
}
}
}
fn same_or_none<T: Eq + Clone>(left: &Option<T>, right: &Option<T>) -> Option<T> {
if left.as_ref()? == right.as_ref()? {
Some(left.as_ref().unwrap().clone())
} else {
None
}
}
/// An object is either a stack or a heap object.
/// TODO: add a type for tracking for global variables!
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy, PartialOrd, Ord)]
pub enum ObjectType {
Stack,
Heap,
}
/// An object is either alive or dangling (because the memory was freed or a function return invalidated the stack frame).
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy, PartialOrd, Ord)]
pub enum ObjectState {
Alive,
Dangling,
}
#[cfg(test)]
mod tests {
use super::*;
fn new_abstract_object() -> AbstractObject {
let obj_info = AbstractObjectInfo {
pointer_targets: BTreeSet::new(),
is_unique: true,
state: Some(ObjectState::Alive),
type_: Some(ObjectType::Heap),
memory: MemRegion::new(64),
};
AbstractObject::Memory(obj_info)
}
fn new_data(number: i64) -> Data {
Data::Value(bv(number))
}
fn bv(number: i64) -> BitvectorDomain {
BitvectorDomain::Value(Bitvector::from_i64(number))
}
#[test]
fn abstract_object() {
let mut object = new_abstract_object();
let three = new_data(3);
let offset = bv(-15);
object.set_value(three, offset).unwrap();
assert_eq!(
object.get_value(Bitvector::from_i64(-16), 64),
Data::Top(64)
);
assert_eq!(object.get_value(Bitvector::from_i64(-15), 64), new_data(3));
object.set_value(new_data(4), bv(-12)).unwrap();
assert_eq!(
object.get_value(Bitvector::from_i64(-15), 64),
Data::Top(64)
);
let mut other_object = new_abstract_object();
object.set_value(new_data(0), bv(0)).unwrap();
other_object.set_value(new_data(0), bv(0)).unwrap();
let merged_object = object.merge(&other_object);
assert_eq!(
merged_object.get_value(Bitvector::from_i64(-12), 64),
Data::Top(64)
);
assert_eq!(
merged_object.get_value(Bitvector::from_i64(0), 64),
new_data(0)
);
}
}
use super::data::*;
use super::identifier::AbstractIdentifier;
use super::object::*;
use crate::analysis::abstract_domain::*;
use crate::bil::Bitvector;
use crate::prelude::*;
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, BTreeSet};
use std::ops::Deref;
use std::sync::Arc;
/// The list of all known abstract objects.
///
/// Each abstract object is unique in the sense that each pointer can only point to one abstract object.
/// If a pointer may point to two different abstract objects,
/// these two objects will be merged to one object.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct AbstractObjectList {
objects: Vec<Arc<AbstractObject>>,
ids: BTreeMap<AbstractIdentifier, (usize, BitvectorDomain)>,
}
impl AbstractObjectList {
/// Create a new abstract object list with just one abstract object corresponding to the stack.
/// The offset into the stack object will be set to zero.
pub fn from_stack_id(
stack_id: AbstractIdentifier,
address_bitsize: BitSize,
) -> AbstractObjectList {
let mut objects = Vec::new();
let stack_object = AbstractObject::new(ObjectType::Stack, address_bitsize);
objects.push(Arc::new(stack_object));
let mut ids = BTreeMap::new();
ids.insert(
stack_id,
(0, Bitvector::zero((address_bitsize as usize).into()).into()),
);
AbstractObjectList { objects, ids }
}
/// Check the state of a memory object at a given address.
/// Returns True if at least one of the targets of the pointer is dangling.
/// May lead to false negatives, as objects with unknown object states are treated the same as alive objects.
pub fn is_dangling_pointer(&self, address: &Data) -> bool {
match address {
Data::Value(_) | Data::Top(_) => (),
Data::Pointer(pointer) => {
for (id, _offset) in pointer.iter_targets() {
let (object_index, _offset_id) = self.ids.get(id).unwrap();
if let AbstractObject::Memory(ref object) = *self.objects[*object_index] {
if object.state == Some(ObjectState::Dangling) {
return true;
}
}
}
}
}
return false;
}
/// Get the value at a given address.
/// If the address is not unique, merge the value of all possible addresses.
///
/// TODO: document when this function should return errors
pub fn get_value(&self, address: &Data, size: BitSize) -> Result<Data, Error> {
match address {
Data::Value(value) => Err(anyhow!("Load from non-pointer value:\n{:?}", value)),
Data::Top(_) => Ok(Data::new_top(size)),
Data::Pointer(pointer) => {
// TODO: Document the design decisions behind the implementation!
let mut merged_value: Option<Data> = None;
for (id, offset_pointer_domain) in pointer.iter_targets() {
let (abstract_object_index, offset_identifier) = self.ids.get(id).unwrap();
let offset = offset_pointer_domain.clone() + offset_identifier.clone();
if let BitvectorDomain::Value(concrete_offset) = offset {
let value = self
.objects
.get(*abstract_object_index)
.unwrap()
.get_value(concrete_offset, size);
merged_value = match merged_value {
Some(accum) => Some(accum.merge(&value)),
None => Some(value),
};
} else {
merged_value = Some(Data::new_top(size));
break;
}
}
merged_value.ok_or(anyhow!("Pointer without targets encountered."))
}
}
}
/// Set the value at a given address.
///
/// Returns an error if the gitven address has no targets.
/// If the address has more than one target, all targets are merged to one untracked object.
// TODO: Implement write-merging to still tracked objects!
pub fn set_value(&mut self, pointer: PointerDomain, value: Data) -> Result<(), Error> {
let mut target_object_set: BTreeSet<usize> = BTreeSet::new();
for (id, _offset) in pointer.iter_targets() {
target_object_set.insert(self.ids.get(id).unwrap().0);
}
if target_object_set.len() == 0 {
return Err(anyhow!("Pointer without targets encountered"));
}
if target_object_set.len() == 1 {
let mut target_offset: Option<BitvectorDomain> = None;
for (id, pointer_offset) in pointer.iter_targets() {
let adjusted_offset = pointer_offset.clone() + self.ids.get(id).unwrap().1.clone();
target_offset = match target_offset {
Some(offset) => Some(offset.merge(&adjusted_offset)),
None => Some(adjusted_offset),
}
}
let object = self
.objects
.get_mut(*target_object_set.iter().next().unwrap())
.unwrap();
Arc::make_mut(object).set_value(value, target_offset.unwrap())?; // TODO: Write unit test whether this is correctly written to the self.objects vector!
} else {
// There is more than one object that the pointer may write to.
// We merge all targets to one untracked object
// TODO: Implement merging to a still tracked object!
// Get all pointer targets the object may point to
let mut inner_targets: BTreeSet<AbstractIdentifier> = BTreeSet::new();
for object in target_object_set.iter() {
inner_targets.append(
&mut self
.objects
.get(*object)
.unwrap()
.get_all_possible_pointer_targets(),
);
}
// Generate the new (untracked) object that all other objects are merged to
let new_object = AbstractObject::Untracked(inner_targets);
// generate the ne map from abstract identifier to index of corresponding memory object
let mut index_map = BTreeMap::new();
let mut new_object_vec: Vec<Arc<AbstractObject>> = Vec::new();
for old_index in 0..self.objects.len() {
if target_object_set.get(&old_index).is_none() {
index_map.insert(old_index, new_object_vec.len());
new_object_vec.push(self.objects.get(old_index).unwrap().clone());
}
}
new_object_vec.push(Arc::new(new_object));
let merged_object_index = new_object_vec.len() - 1;
for old_index in target_object_set {
index_map.insert(old_index, merged_object_index);
}
let mut new_id_map: BTreeMap<AbstractIdentifier, (usize, BitvectorDomain)> =
BTreeMap::new();
for (id, (old_index, offset)) in self.ids.iter() {
new_id_map.insert(id.clone(), (index_map[old_index], offset.clone()));
}
self.objects = new_object_vec;
self.ids = new_id_map;
// now we can do the actual write operation on the newly merged object
// the offset does not matter since the merged object is untracked anyway
Arc::make_mut(self.objects.get_mut(merged_object_index).unwrap())
.set_value(value, BitvectorDomain::new_top(pointer.bitsize()))?;
}
Ok(())
}
pub fn merge(&self, other: &Self) -> Self {
let mut merged_objects = self.objects.clone();
let mut merged_ids = self.ids.clone();
for (other_id, (other_index, other_offset)) in other.ids.iter() {
if let Some((index, offset)) = merged_ids.get(&other_id).clone() {
let (index, offset) = (*index, offset.clone());
merged_ids.insert(other_id.clone(), (index, offset.merge(&other_offset)));
if index < self.objects.len() {
// The object already existed in self, so we have to merge it with the object in other
merged_objects[index] =
Arc::new(merged_objects[index].merge(&other.objects[*other_index]));
// TODO: This is still inefficient, since we may end up merging the same objects more than once (if several ids point to it)
}
} else {
merged_objects.push(other.objects.get(*other_index).unwrap().clone());
merged_ids.insert(
other_id.clone(),
(merged_objects.len() - 1, other_offset.clone()),
);
}
}
// merge the underlying abstract objects.
AbstractObjectList {
objects: merged_objects,
ids: merged_ids,
}
}
/// Replace one abstract identifier with another one. Adjust offsets of all pointers accordingly.
///
/// **Example:**
/// Assume the old_id points to offset 0 in the corresponding memory object and the new_id points to offset -32.
/// Then the offset_adjustment is -32.
/// The offset_adjustment gets *added* to the base offset in self.memory.ids (so that it points to offset -32 in the memory object),
/// while it gets *subtracted* from all pointer values (so that they still point to the same spot in the corresponding memory object).
pub fn replace_abstract_id(
&mut self,
old_id: &AbstractIdentifier,
new_id: &AbstractIdentifier,
offset_adjustment: &BitvectorDomain,
) {
for object in self.objects.iter_mut() {
Arc::make_mut(object).replace_abstract_id(
old_id,
new_id,
&(-offset_adjustment.clone()),
);
}
if let Some((index, offset)) = self.ids.get(old_id) {
let index = *index;
// Note that we have to *subtract* the offset offset_adjustment to get the new offset,
// since the offset_adjustment gets added to all pointers.
// This way all pointers will still point to the same place in memory.
let new_offset = offset.clone() + offset_adjustment.clone();
self.ids.remove(old_id);
self.ids.insert(new_id.clone(), (index, new_offset));
}
}
/// Remove the pointer from the object_id to the corresponding memory object.
pub fn remove_object_pointer(&mut self, object_id: &AbstractIdentifier) {
self.ids.remove(object_id);
}
/// Add a new abstract object to the object list
pub fn add_abstract_object(
&mut self,
object_id: AbstractIdentifier,
initial_offset: BitvectorDomain,
type_: ObjectType,
address_bitsize: BitSize,
) {
let new_object = AbstractObject::new(type_, address_bitsize);
if let Some((index, offset)) = self.ids.get(&object_id) {
// If the identifier already exists, we have to assume that more than one object may be referred by this identifier.
let object = Arc::make_mut(&mut self.objects[*index]);
if let AbstractObject::Memory(object_info) = object {
object_info.is_unique = false;
}
*object = object.merge(&new_object);
let index = *index;
let merged_offset = offset.merge(&initial_offset);
self.ids.insert(object_id, (index, merged_offset));
} else {
let index = self.objects.len();
self.objects.push(Arc::new(new_object));
self.ids.insert(object_id, (index, initial_offset));
}
}
/// return all ids that get referenced by the memory object pointed to by the given id
pub fn get_referenced_ids(&self, id: &AbstractIdentifier) -> BTreeSet<AbstractIdentifier> {
if let Some((index, _offset)) = self.ids.get(id) {
self.objects[*index].get_referenced_ids()
} else {
BTreeSet::new()
}
}
/// Remove all abstract identifier not contained in the provided set of identifier.
/// Then remove all objects not longer referenced by any identifier.
pub fn remove_unused_ids(&mut self, ids_to_keep: &BTreeSet<AbstractIdentifier>) {
let all_ids: BTreeSet<AbstractIdentifier> = self.ids.keys().cloned().collect();
let ids_to_remove = all_ids.difference(ids_to_keep);
for id in ids_to_remove {
self.ids.remove(id);
}
let referenced_objects: BTreeSet<usize> =
self.ids.values().map(|(index, _offset)| *index).collect();
if referenced_objects.len() != self.objects.len() {
// We have to remove some objects and map the object indices to new values
let mut new_object_list = Vec::new();
let mut index_map = BTreeMap::new();
for i in 0..self.objects.len() {
if referenced_objects.get(&i).is_some() {
index_map.insert(i, new_object_list.len());
new_object_list.push(self.objects[i].clone());
}
}
self.objects = new_object_list;
// map the object indices to their new values
for (index, _offset) in self.ids.values_mut() {
*index = *index_map.get(index).unwrap();
}
}
}
/// Mark a memory object as already freed (i.e. pointers to it are dangling).
/// If the object cannot be identified uniquely, all possible targets are marked as having an unknown status.
pub fn mark_mem_object_as_freed(
&mut self,
object_pointer: &PointerDomain,
) -> Result<(), Vec<AbstractIdentifier>> {
let ids = object_pointer.get_target_ids();
let mut possible_double_free_ids = Vec::new();
if ids.len() > 1 {
for id in ids {
let object = &mut self.objects[self.ids[&id].0];
if let AbstractObject::Memory(tracked_mem) = Arc::deref(object) {
if (tracked_mem.state != Some(ObjectState::Alive) && tracked_mem.is_unique)
|| tracked_mem.state == Some(ObjectState::Dangling)
{
// Possible double free detected
// TODO: Check rate of false positives.
// If too high, only mark those with explicit dangling state.
possible_double_free_ids.push(id.clone());
}
}
Arc::make_mut(object).set_state(None);
}
} else {
if let Some(id) = ids.iter().next() {
let object = &mut self.objects[self.ids[&id].0];
if let AbstractObject::Memory(tracked_mem) = Arc::deref(object) {
if tracked_mem.state != Some(ObjectState::Alive) {
// Possible double free detected
// TODO: Check rate of false positives.
// If too high, only mark those with explicit dangling state.
possible_double_free_ids.push(id.clone());
}
}
Arc::make_mut(object).set_state(Some(ObjectState::Dangling));
}
}
if possible_double_free_ids.is_empty() {
return Ok(());
} else {
return Err(possible_double_free_ids);
}
}
/// Mark the memory object behind an abstract identifier as untracked.
/// Also add new possible reference targets to the object.
///
/// This is used as a very coarse approximation for function calls whose effect is unknown.
/// Since a function may spawn a new thread constantly writing to this memory object,
/// the content of the memory object may not become known later on.
/// The new reference targets are added because we also do not know whether the function adds pointers to the memory object.
pub fn mark_mem_object_as_untracked(
&mut self,
object_id: &AbstractIdentifier,
new_possible_reference_targets: &BTreeSet<AbstractIdentifier>,
) {
let object_index = self.ids[object_id].0;
let reference_targets = self.objects[object_index]
.get_all_possible_pointer_targets()
.union(new_possible_reference_targets)
.cloned()
.collect();
self.objects[object_index] = Arc::new(AbstractObject::Untracked(reference_targets));
}
/// Get the number of objects that are currently tracked.
#[cfg(test)]
pub fn get_num_objects(&self) -> usize {
self.objects.len()
}
}
impl AbstractObjectList {
pub fn to_json_compact(&self) -> serde_json::Value {
use serde_json::*;
let mut object_list = Vec::new();
for (index, object) in self.objects.iter().enumerate() {
let id_list: Vec<Value> = self
.ids
.iter()
.filter_map(|(id, (obj_index, offset))| {
if *obj_index == index {
Some(Value::String(format!("{}:{}", id, offset)))
} else {
None
}
})
.collect();
let id_list = Value::Array(id_list);
let mut obj_map = Map::new();
obj_map.insert("ids".into(), id_list);
obj_map.insert("object".into(), object.to_json_compact());
object_list.push(Value::Object(obj_map));
}
Value::Array(object_list)
}
}
#[cfg(test)]
impl AbstractObjectList {
/// Get access to the internal id map for unit tests
pub fn get_internal_id_map(&self) -> &BTreeMap<AbstractIdentifier, (usize, BitvectorDomain)> {
&self.ids
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::analysis::pointer_inference::identifier::*;
fn bv(value: i64) -> BitvectorDomain {
BitvectorDomain::Value(Bitvector::from_i64(value))
}
fn new_id(name: String) -> AbstractIdentifier {
AbstractIdentifier::new(Tid::new("time0"), AbstractLocation::Register(name, 64))
}
#[test]
fn abstract_object_list() {
let mut obj_list = AbstractObjectList::from_stack_id(new_id("RSP".into()), 64);
assert_eq!(obj_list.objects.len(), 1);
assert_eq!(obj_list.ids.len(), 1);
assert_eq!(*obj_list.ids.values().next().unwrap(), (0, bv(0)));
let pointer = PointerDomain::new(new_id("RSP".into()), bv(8));
obj_list
.set_value(pointer.clone(), Data::Value(bv(42)))
.unwrap();
assert_eq!(
obj_list
.get_value(&Data::Pointer(pointer.clone()), 64)
.unwrap(),
Data::Value(bv(42))
);
let mut other_obj_list = AbstractObjectList::from_stack_id(new_id("RSP".into()), 64);
let second_pointer = PointerDomain::new(new_id("RSP".into()), bv(-8));
other_obj_list
.set_value(pointer.clone(), Data::Value(bv(42)))
.unwrap();
other_obj_list
.set_value(second_pointer.clone(), Data::Value(bv(35)))
.unwrap();
assert_eq!(
other_obj_list
.get_value(&Data::Pointer(second_pointer.clone()), 64)
.unwrap(),
Data::Value(bv(35))
);
other_obj_list.add_abstract_object(new_id("RAX".into()), bv(0), ObjectType::Heap, 64);
let heap_pointer = PointerDomain::new(new_id("RAX".into()), bv(8));
other_obj_list
.set_value(heap_pointer.clone(), Data::Value(bv(3)))
.unwrap();
let mut merged = obj_list.merge(&other_obj_list);
assert_eq!(
merged
.get_value(&Data::Pointer(pointer.clone()), 64)
.unwrap(),
Data::Value(bv(42))
);
assert_eq!(
merged
.get_value(&Data::Pointer(second_pointer.clone()), 64)
.unwrap(),
Data::new_top(64)
);
assert_eq!(
merged
.get_value(&Data::Pointer(heap_pointer.clone()), 64)
.unwrap(),
Data::Value(bv(3))
);
assert_eq!(merged.objects.len(), 2);
assert_eq!(merged.ids.len(), 2);
merged
.set_value(pointer.merge(&heap_pointer), Data::Value(bv(3)))
.unwrap();
assert_eq!(
merged
.get_value(&Data::Pointer(pointer.clone()), 64)
.unwrap(),
Data::new_top(64)
);
// assert_eq!(merged.get_value(&Data::Pointer(heap_pointer.clone()), 64).unwrap(), Data::Value(bv(3)));
assert_eq!(merged.objects.len(), 1); // This will fail in the future when the set_value function does no automatic merging to untracked objects anymore.
other_obj_list
.set_value(pointer.clone(), Data::Pointer(heap_pointer.clone()))
.unwrap();
assert_eq!(
other_obj_list
.get_referenced_ids(&new_id("RSP".into()))
.len(),
1
);
assert_eq!(
*other_obj_list
.get_referenced_ids(&new_id("RSP".into()))
.iter()
.next()
.unwrap(),
new_id("RAX".into())
);
let modified_heap_pointer = PointerDomain::new(new_id("ID2".into()), bv(8));
other_obj_list.replace_abstract_id(&new_id("RAX".into()), &new_id("ID2".into()), &bv(0));
assert_eq!(
other_obj_list
.get_value(&Data::Pointer(pointer.clone()), 64)
.unwrap(),
Data::Pointer(modified_heap_pointer.clone())
);
assert_eq!(other_obj_list.ids.get(&new_id("RAX".into())), None);
assert!(matches!(
other_obj_list.ids.get(&new_id("ID2".into())),
Some(_)
));
let mut ids_to_keep = BTreeSet::new();
ids_to_keep.insert(new_id("ID2".into()));
other_obj_list.remove_unused_ids(&ids_to_keep);
assert_eq!(other_obj_list.objects.len(), 1);
assert_eq!(other_obj_list.ids.len(), 1);
assert_eq!(
other_obj_list.ids.iter().next().unwrap(),
(&new_id("ID2".into()), &(0, bv(0)))
);
assert_eq!(
other_obj_list.objects[0].get_state(),
Some(crate::analysis::pointer_inference::object::ObjectState::Alive)
);
other_obj_list
.mark_mem_object_as_freed(&modified_heap_pointer)
.unwrap();
assert_eq!(
other_obj_list.objects[0].get_state(),
Some(crate::analysis::pointer_inference::object::ObjectState::Dangling)
);
}
}
use super::data::*;
use super::identifier::{AbstractIdentifier, AbstractLocation};
use super::object_list::AbstractObjectList;
use crate::analysis::abstract_domain::*;
use crate::bil::*;
use crate::prelude::*;
use crate::term::symbol::ExternSymbol;
use std::collections::{BTreeMap, BTreeSet};
/// This struct contains all information known about the state at a specific point of time.
///
/// Notes:
/// - The *stack_id* is the identifier of the current stack frame.
/// Only reads and writes with offset less than 0 are permitted for it
/// - The *caller_ids* contain all known identifier of caller stack frames.
/// If a read to an offset >= 0 corresponding to the current stack frame happens, it is considered
/// a merge read to all caller stack frames.
/// A write to an offset >= 0 corresponding to the current stack frame writes to all caller stack frames.
/// - The caller_ids are given by the stack pointer at time of the call.
/// This way we can distinguish caller stack frames even if one function calls another several times.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct State {
register: BTreeMap<Variable, Data>,
pub memory: AbstractObjectList,
pub stack_id: AbstractIdentifier,
pub caller_ids: BTreeSet<AbstractIdentifier>,
}
impl State {
/// Create a new state that contains only one memory object corresponding to the stack.
/// The stack offset will be set to zero.
pub fn new(stack_register: &Variable, function_tid: Tid) -> State {
let stack_id = AbstractIdentifier::new(
function_tid,
AbstractLocation::from_var(stack_register).unwrap(),
);
let mut register: BTreeMap<Variable, Data> = BTreeMap::new();
register.insert(
stack_register.clone(),
PointerDomain::new(
stack_id.clone(),
Bitvector::zero((stack_register.bitsize().unwrap() as usize).into()).into(),
)
.into(),
);
State {
register,
memory: AbstractObjectList::from_stack_id(
stack_id.clone(),
stack_register.bitsize().unwrap(),
),
stack_id,
caller_ids: BTreeSet::new(),
}
}
/// Get the value of a register or Top() if no value is known.
///
/// Returns an error if the variable is not a register.
pub fn get_register(&self, variable: &Variable) -> Result<Data, Error> {
if let Some(data) = self.register.get(variable) {
Ok(data.clone())
} else {
Ok(Data::new_top(variable.bitsize()?))
}
}
/// Get the value of a register by its name.
///
/// Returns None if no value is set for the register.
pub fn get_register_by_name(&self, reg_name: &str) -> Option<Data> {
self.register.iter().find_map(|(key, value)| {
if key.name == reg_name {
Some(value.clone())
} else {
None
}
})
}
/// Set the value of a register.
///
/// Returns an error if the variable is not a register.
pub fn set_register(&mut self, variable: &Variable, value: Data) -> Result<(), Error> {
if let variable::Type::Immediate(_bitsize) = variable.type_ {
if !value.is_top() {
self.register.insert(variable.clone(), value);
} else {
self.register.remove(variable);
}
Ok(())
} else {
return Err(anyhow!("Variable is not a register type"));
}
}
/// Evaluate expression on the given state and write the result to the target register.
pub fn handle_register_assign(
&mut self,
target: &Variable,
expression: &Expression,
) -> Result<(), Error> {
if let Expression::Var(variable) = expression {
if target == variable {
// The assign does nothing. Occurs as "do nothing"-path in conditional stores.
// Needs special handling, since it is the only case where the target is allowed
// to denote memory instead of a register.
return Ok(());
}
}
match self.eval(expression) {
Ok(new_value) => {
self.set_register(target, new_value)?;
Ok(())
}
Err(err) => {
self.set_register(target, Data::new_top(target.bitsize()?))?;
Err(err)
}
}
}
/// Clear all non-callee-saved registers from the state.
/// This automatically also removes all virtual registers.
/// The parameter is a list of callee-saved register names.
pub fn clear_non_callee_saved_register(&mut self, callee_saved_register_names: &[String]) {
let register = self
.register
.iter()
.filter_map(|(register, value)| {
if callee_saved_register_names
.iter()
.find(|reg_name| **reg_name == register.name)
.is_some()
{
Some((register.clone(), value.clone()))
} else {
None
}
})
.collect();
self.register = register;
}
/// evaluate the value of an expression in the current state
pub fn eval(&self, expression: &Expression) -> Result<Data, Error> {
use Expression::*;
match expression {
Var(variable) => self.get_register(&variable),
Const(bitvector) => Ok(Data::bitvector(bitvector.clone())),
// TODO: implement handling of endianness for loads and writes!
Load {
memory: _,
address,
endian: _,
size,
} => Ok(self
.memory
.get_value(&self.adjust_pointer_for_read(&self.eval(address)?), *size)?),
Store { .. } => {
// This does not return an error, but panics outright.
// If this would return an error, it would hide a side effect, which is not allowed to happen.
panic!("Store expression cannot be evaluated!")
}
BinOp { op, lhs, rhs } => {
if *op == crate::bil::BinOpType::XOR && lhs == rhs {
// TODO: implement bitsize() for expressions to remove the state.eval(lhs) hack
return Ok(Data::Value(BitvectorDomain::Value(Bitvector::zero(
apint::BitWidth::new(self.eval(lhs)?.bitsize() as usize)?,
))));
}
let (left, right) = (self.eval(lhs)?, self.eval(rhs)?);
Ok(left.bin_op(*op, &right))
}
UnOp { op, arg } => Ok(self.eval(arg)?.un_op(*op)),
Cast { kind, width, arg } => Ok(self.eval(arg)?.cast(*kind, *width)),
Let {
var: _,
bound_exp: _,
body_exp: _,
} => Err(anyhow!("Let binding expression handling not implemented")),
Unknown { description, type_ } => {
if let crate::bil::variable::Type::Immediate(bitsize) = type_ {
Ok(Data::new_top(*bitsize))
} else {
Err(anyhow!("Unknown Memory operation: {}", description))
}
}
IfThenElse {
condition: _,
true_exp,
false_exp,
} => Ok(self.eval(true_exp)?.merge(&self.eval(false_exp)?)),
Extract {
low_bit,
high_bit,
arg,
} => Ok(self.eval(arg)?.extract(*low_bit, *high_bit)),
Concat { left, right } => Ok(self.eval(left)?.concat(&self.eval(right)?)),
}
}
/// Check if an expression contains a use-after-free
pub fn contains_access_of_dangling_memory(&self, expression: &Expression) -> bool {
use Expression::*;
match expression {
Var(_) | Const(_) | Unknown { .. } => false,
Load {
address: address_exp,
..
} => {
if let Ok(pointer) = self.eval(address_exp) {
self.memory.is_dangling_pointer(&pointer)
|| self.contains_access_of_dangling_memory(address_exp)
} else {
false
}
}
Store {
memory: _,
address: address_exp,
value: value_exp,
..
} => {
let address_check = if let Ok(pointer) = self.eval(address_exp) {
self.memory.is_dangling_pointer(&pointer)
} else {
false
};
address_check
|| self.contains_access_of_dangling_memory(address_exp)
|| self.contains_access_of_dangling_memory(value_exp)
}
BinOp { op: _, lhs, rhs } => {
self.contains_access_of_dangling_memory(lhs)
|| self.contains_access_of_dangling_memory(rhs)
}
UnOp { op: _, arg } => self.contains_access_of_dangling_memory(arg),
Cast {
kind: _,
width: _,
arg,
} => self.contains_access_of_dangling_memory(arg),
Let {
var: _,
bound_exp,
body_exp,
} => {
self.contains_access_of_dangling_memory(bound_exp)
|| self.contains_access_of_dangling_memory(body_exp)
}
IfThenElse {
condition,
true_exp,
false_exp,
} => {
self.contains_access_of_dangling_memory(condition)
|| self.contains_access_of_dangling_memory(true_exp)
|| self.contains_access_of_dangling_memory(false_exp)
}
Extract {
low_bit: _,
high_bit: _,
arg,
} => self.contains_access_of_dangling_memory(arg),
Concat { left, right } => {
self.contains_access_of_dangling_memory(left)
|| self.contains_access_of_dangling_memory(right)
}
}
}
pub fn store_value(&mut self, address: &Data, value: &Data) -> Result<(), Error> {
if let Data::Pointer(pointer) = self.adjust_pointer_for_read(address) {
// TODO: This is a very inexact shortcut, as this write will unnecessarily merge caller memory regions.
// A more precise solution would write to every caller memory region separately,
// but would also need to check first whether the target memory region is unique or not.
self.memory.set_value(pointer, value.clone())?;
return Ok(());
} else {
// TODO: Implement recognition of stores to global memory.
// Needs implementation of reads from global data first.
return Err(anyhow!("Memory write to non-pointer data"));
}
}
/// Write a value to the address one gets when evaluating the address expression.
pub fn write_to_address(&mut self, address: &Expression, value: &Data) -> Result<(), Error> {
// TODO: Depending on the separation logic, some memory may need to be invalidated in the error case.
match self.eval(address) {
Ok(address_data) => self.store_value(&address_data, value),
Err(err) => Err(err),
}
}
/// Evaluate the given store expression on the given state and return the resulting state.
///
/// The function panics if given anything else than a store expression.
pub fn handle_store_exp(&mut self, store_exp: &Expression) -> Result<(), Error> {
if let Expression::Store {
memory: _,
address,
value,
endian: _,
size,
} = store_exp
{
let data = self.eval(value).unwrap_or(Data::new_top(*size));
assert_eq!(data.bitsize(), *size);
// TODO: At the moment, both memory and endianness are ignored. Change that!
return self.write_to_address(address, &data);
} else {
panic!("Expected store expression")
}
}
/// Mark those parameter values of an extern function call, that are passed on the stack,
/// as unknown data (since the function may modify them).
pub fn clear_stack_parameter(&mut self, extern_call: &ExternSymbol) -> Result<(), Error> {
// TODO: This needs a unit test to check whether stack parameters are cleared as expected!
let mut result_log = Ok(());
for arg in &extern_call.arguments {
match &arg.location {
Expression::Var(_) => {}
location_expression => {
let arg_size = arg
.var
.bitsize()
.expect("Encountered argument with unknown size");
let data_top = Data::new_top(arg_size);
if let Err(err) = self.write_to_address(location_expression, &data_top) {
result_log = Err(err);
}
}
}
}
// We only return the last error encountered.
return result_log;
}
/// merge two states
pub fn merge(&self, other: &Self) -> Self {
assert_eq!(self.stack_id, other.stack_id);
let mut merged_register = BTreeMap::new();
for (register, other_value) in other.register.iter() {
if let Some(value) = self.register.get(register) {
let merged_value = value.merge(other_value);
if merged_value.is_top() == false {
// We only have to keep non-top elements.
merged_register.insert(register.clone(), merged_value);
}
}
}
let merged_memory_objects = self.memory.merge(&other.memory);
State {
register: merged_register,
memory: merged_memory_objects,
stack_id: self.stack_id.clone(),
caller_ids: self.caller_ids.union(&other.caller_ids).cloned().collect(),
}
}
/// If the pointer contains a reference to the stack with offset >= 0, replace it with a pointer
/// pointing to all possible caller ids.
fn adjust_pointer_for_read(&self, address: &Data) -> Data {
// TODO: There is a rare special case that is not handled correctly
// and might need a change in the way caller_ids get tracked to fix:
// If no caller_id is present, one can read (and write) to addresses on the stack with positive offset
// But if such a state gets merged with a state with caller_ids,
// then these values at positive offsets get overshadowed by the new callers,
// but they get not properly merged with the values from the other callers!
if let Data::Pointer(pointer) = address {
let mut new_targets = PointerDomain::with_targets(BTreeMap::new());
for (id, offset) in pointer.iter_targets() {
if *id == self.stack_id {
match offset {
BitvectorDomain::Value(offset_val) => {
if offset_val.try_to_i64().unwrap() >= 0 && self.caller_ids.len() > 0 {
for caller_id in self.caller_ids.iter() {
new_targets.add_target(caller_id.clone(), offset.clone());
}
// Note that the id of the current stack frame was *not* added.
} else {
new_targets.add_target(id.clone(), offset.clone());
}
}
BitvectorDomain::Top(_bitsize) => {
for caller_id in self.caller_ids.iter() {
new_targets.add_target(caller_id.clone(), offset.clone());
}
// Note that we also add the id of the current stack frame
new_targets.add_target(id.clone(), offset.clone());
}
}
} else {
new_targets.add_target(id.clone(), offset.clone());
}
}
return Data::Pointer(new_targets);
} else {
return address.clone();
}
}
/// Replace all occurences of old_id with new_id and adjust offsets accordingly.
/// This is needed to replace stack/caller ids on call and return instructions.
///
/// **Example:**
/// Assume the old_id points to offset 0 in the corresponding memory object and the new_id points to offset -32.
/// Then the offset_adjustment is -32.
/// The offset_adjustment gets *added* to the base offset in self.memory.ids (so that it points to offset -32 in the memory object),
/// while it gets *subtracted* from all pointer values (so that they still point to the same spot in the corresponding memory object).
pub fn replace_abstract_id(
&mut self,
old_id: &AbstractIdentifier,
new_id: &AbstractIdentifier,
offset_adjustment: &BitvectorDomain,
) {
// TODO: This function does not adjust stack frame/caller stack frame relations!
// Refactor so that the corresponding logic is contained in State.
// Else this function can be used to generate invalid state on improper use!
for register_data in self.register.values_mut() {
register_data.replace_abstract_id(old_id, new_id, &(-offset_adjustment.clone()));
}
self.memory
.replace_abstract_id(old_id, new_id, offset_adjustment);
if &self.stack_id == old_id {
self.stack_id = new_id.clone();
}
if self.caller_ids.get(old_id).is_some() {
self.caller_ids.remove(old_id);
self.caller_ids.insert(new_id.clone());
}
}
pub fn remove_unreferenced_objects(&mut self) {
// get all referenced ids
let mut referenced_ids = BTreeSet::new();
for (_reg_name, data) in self.register.iter() {
referenced_ids.append(&mut data.referenced_ids());
}
referenced_ids.insert(self.stack_id.clone());
referenced_ids.append(&mut self.caller_ids.clone());
referenced_ids = self.add_recursively_referenced_ids_to_id_set(referenced_ids);
// remove unreferenced ids
self.memory.remove_unused_ids(&referenced_ids);
}
pub fn add_recursively_referenced_ids_to_id_set(
&self,
mut ids: BTreeSet<AbstractIdentifier>,
) -> BTreeSet<AbstractIdentifier> {
let mut unsearched_ids = ids.clone();
while let Some(id) = unsearched_ids.iter().next() {
let id = id.clone();
unsearched_ids.remove(&id);
let memory_ids = self.memory.get_referenced_ids(&id);
for mem_id in memory_ids {
if ids.get(&mem_id).is_none() {
ids.insert(mem_id.clone());
unsearched_ids.insert(mem_id);
}
}
}
return ids;
}
/// Merge the callee stack with the caller stack.
///
/// This deletes the pointer from the callee_id to the corresponding memory object
/// and updates all other references pointing to the callee_id to point to the caller_id.
/// The offset adjustment is handled as in `replace_abstract_id`.
///
/// Note that right now the content of the callee memory object is not merged into the caller memory object.
/// In general this is the correct behaviour as the content below the stack pointer should be considered uninitialized memory after returning to the caller.
/// TODO: Check whether compilers may deviate from this convention when optimizing aggressively.
/// TODO: Also merge the memory objects!
// TODO: write unit tests
pub fn merge_callee_stack_to_caller_stack(
&mut self,
callee_id: &AbstractIdentifier,
caller_id: &AbstractIdentifier,
offset_adjustment: &BitvectorDomain,
) {
self.memory.remove_object_pointer(callee_id);
self.replace_abstract_id(callee_id, caller_id, offset_adjustment);
// TODO: Add a check that makes sure no other ids point to the now obsolete callee stack object!
}
/// Mark a memory object as already freed (i.e. pointers to it are dangling).
/// If the object cannot be identified uniquely, all possible targets are marked as having an unknown status.
///
/// If this may cause double frees (i.e. the object in question may have been freed already),
/// an error with the list of possibly already freed objects is returned.
pub fn mark_mem_object_as_freed(
&mut self,
object_pointer: &PointerDomain,
) -> Result<(), Vec<AbstractIdentifier>> {
self.memory.mark_mem_object_as_freed(object_pointer)
}
/// Remove all virtual register from the state.
/// This should only be done in cases where it is known that no virtual registers can be alive.
/// Example: At the start of a basic block no virtual registers should be alive.
pub fn remove_virtual_register(&mut self) {
self.register = self
.register
.clone()
.into_iter()
.filter(|(register, _value)| register.is_temp == false)
.collect();
}
}
impl State {
pub fn to_json_compact(&self) -> serde_json::Value {
use serde_json::*;
let mut state_map = Map::new();
let register = self
.register
.iter()
.map(|(var, data)| (var.name.clone(), data.to_json_compact()))
.collect();
let register = Value::Object(register);
state_map.insert("register".into(), register);
state_map.insert("memory".into(), self.memory.to_json_compact());
state_map.insert(
"stack_id".into(),
Value::String(format!("{}", self.stack_id)),
);
state_map.insert(
"caller_ids".into(),
Value::Array(
self.caller_ids
.iter()
.map(|id| Value::String(format!("{}", id)))
.collect(),
),
);
Value::Object(state_map)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn bv(value: i64) -> BitvectorDomain {
BitvectorDomain::Value(Bitvector::from_i64(value))
}
fn new_id(name: String) -> AbstractIdentifier {
AbstractIdentifier::new(Tid::new("time0"), AbstractLocation::Register(name, 64))
}
fn register(name: &str) -> Variable {
Variable {
name: name.into(),
type_: crate::bil::variable::Type::Immediate(64),
is_temp: false,
}
}
fn reg_add(name: &str, value: i64) -> Expression {
Expression::BinOp{
op: BinOpType::PLUS,
lhs: Box::new(Expression::Var(register(name))),
rhs: Box::new(Expression::Const(Bitvector::from_i64(value))),
}
}
fn reg_sub(name: &str, value: i64) -> Expression {
Expression::BinOp{
op: BinOpType::MINUS,
lhs: Box::new(Expression::Var(register(name))),
rhs: Box::new(Expression::Const(Bitvector::from_i64(value))),
}
}
fn store_exp(address: Expression, value: Expression) -> Expression {
let mem_var = Variable {
name: "mem".into(),
type_: crate::bil::variable::Type::Memory { addr_size: 64, elem_size: 64 },
is_temp: false,
};
Expression::Store{
memory : Box::new(Expression::Var(mem_var)),
address: Box::new(address),
value: Box::new(value),
endian: Endianness::LittleEndian,
size: 64,
}
}
fn load_exp(address: Expression) -> Expression {
let mem_var = Variable {
name: "mem".into(),
type_: crate::bil::variable::Type::Memory { addr_size: 64, elem_size: 64 },
is_temp: false,
};
Expression::Load{
memory : Box::new(Expression::Var(mem_var)),
address: Box::new(address),
endian: Endianness::LittleEndian,
size: 64,
}
}
#[test]
fn state() {
use crate::analysis::pointer_inference::object::*;
use crate::bil::Expression::*;
let mut state = State::new(&register("RSP"), Tid::new("time0"));
let stack_id = new_id("RSP".into());
let stack_addr = Data::Pointer(PointerDomain::new(stack_id.clone(), bv(8)));
state
.store_value(&stack_addr, &Data::Value(bv(42)))
.unwrap();
state.register.insert(register("RSP"), stack_addr.clone());
let load_expr = Load {
memory: Box::new(Var(register("RSP"))), // This is wrong, but the memory var is not checked at the moment (since we have only the one for RAM)
address: Box::new(Var(register("RSP"))),
endian: Endianness::LittleEndian,
size: 64 as BitSize,
};
assert_eq!(state.eval(&load_expr).unwrap(), Data::Value(bv(42)));
let mut other_state = State::new(&register("RSP"), Tid::new("time0"));
state.register.insert(register("RAX"), Data::Value(bv(42)));
other_state
.register
.insert(register("RSP"), stack_addr.clone());
other_state
.register
.insert(register("RAX"), Data::Value(bv(42)));
other_state
.register
.insert(register("RBX"), Data::Value(bv(35)));
let merged_state = state.merge(&other_state);
assert_eq!(merged_state.register[&register("RAX")], Data::Value(bv(42)));
assert_eq!(merged_state.register.get(&register("RBX")), None);
assert_eq!(merged_state.eval(&load_expr).unwrap(), Data::new_top(64));
// Test pointer adjustment on reads
state
.memory
.add_abstract_object(new_id("caller".into()), bv(0), ObjectType::Stack, 64);
state.caller_ids.insert(new_id("caller".into()));
state
.store_value(&stack_addr, &Data::Value(bv(15)))
.unwrap();
assert_eq!(
state
.memory
.get_value(
&Data::Pointer(PointerDomain::new(new_id("caller".into()), bv(8))),
64
)
.unwrap(),
Data::Value(bv(15))
);
assert_eq!(state.eval(&load_expr).unwrap(), Data::Value(bv(15)));
// Test replace_abstract_id
let pointer = Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-16)));
state.register.insert(register("RSP"), pointer.clone());
state.store_value(&pointer, &Data::Value(bv(7))).unwrap();
assert_eq!(state.eval(&load_expr).unwrap(), Data::Value(bv(7)));
state.replace_abstract_id(&stack_id, &new_id("callee".into()), &bv(-8));
assert_eq!(state.eval(&load_expr).unwrap(), Data::Value(bv(7)));
assert_eq!(
state
.memory
.get_value(
&Data::Pointer(PointerDomain::new(new_id("callee".into()), bv(-8))),
64
)
.unwrap(),
Data::Value(bv(7))
);
assert_eq!(
state
.memory
.get_value(
&Data::Pointer(PointerDomain::new(new_id("callee".into()), bv(-16))),
64
)
.unwrap(),
Data::new_top(64)
);
state
.memory
.add_abstract_object(new_id("heap_obj".into()), bv(0), ObjectType::Heap, 64);
assert_eq!(state.memory.get_num_objects(), 3);
state.remove_unreferenced_objects();
assert_eq!(state.memory.get_num_objects(), 2);
}
#[test]
fn handle_store() {
use crate::bil::Expression::*;
let mut state = State::new(&register("RSP"), Tid::new("time0"));
let stack_id = new_id("RSP".into());
assert_eq!(state.eval(&Var(register("RSP"))).unwrap(), Data::Pointer(PointerDomain::new(stack_id.clone(), bv(0))));
state.handle_register_assign(&register("RSP"), &reg_sub("RSP", 32)).unwrap();
assert_eq!(state.eval(&Var(register("RSP"))).unwrap(), Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-32))));
state.handle_register_assign(&register("RSP"), &reg_add("RSP", -8)).unwrap();
assert_eq!(state.eval(&Var(register("RSP"))).unwrap(), Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-40))));
state.handle_store_exp(&store_exp(reg_add("RSP", 8), Const(Bitvector::from_i64(1)))).unwrap();
state.handle_store_exp(&store_exp(reg_sub("RSP", 8), Const(Bitvector::from_i64(2)))).unwrap();
state.handle_store_exp(&store_exp(reg_add("RSP", -16), Const(Bitvector::from_i64(3)))).unwrap();
state.handle_register_assign(&register("RSP"), &reg_sub("RSP", 4)).unwrap();
assert_eq!(state.eval(&load_exp(reg_add("RSP", 12))).unwrap(), bv(1).into());
assert_eq!(state.eval(&load_exp(reg_sub("RSP", 4))).unwrap(), bv(2).into());
assert_eq!(state.eval(&load_exp(reg_add("RSP", -12))).unwrap(), bv(3).into());
}
}
use serde::{Deserialize, Serialize};
pub mod variable;
pub use variable::*;
pub type Bitvector = apint::ApInt;
pub type BitSize = u16;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Expression {
Var(Variable),
Const(Bitvector),
Load {
memory: Box<Expression>,
address: Box<Expression>,
endian: Endianness,
size: BitSize,
},
Store {
memory: Box<Expression>,
address: Box<Expression>,
value: Box<Expression>,
endian: Endianness,
size: BitSize,
},
BinOp {
op: BinOpType,
lhs: Box<Expression>,
rhs: Box<Expression>,
},
UnOp {
op: UnOpType,
arg: Box<Expression>,
},
Cast {
kind: CastType,
width: BitSize,
arg: Box<Expression>,
},
Let {
var: Variable,
bound_exp: Box<Expression>,
body_exp: Box<Expression>,
},
Unknown {
description: String,
type_: Type,
},
IfThenElse {
condition: Box<Expression>,
true_exp: Box<Expression>,
false_exp: Box<Expression>,
},
Extract {
low_bit: BitSize,
high_bit: BitSize,
arg: Box<Expression>,
},
Concat {
left: Box<Expression>,
right: Box<Expression>,
},
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum CastType {
UNSIGNED,
SIGNED,
HIGH,
LOW,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum BinOpType {
PLUS,
MINUS,
TIMES,
DIVIDE,
SDIVIDE,
MOD,
SMOD,
LSHIFT,
RSHIFT,
ARSHIFT,
AND,
OR,
XOR,
EQ,
NEQ,
LT,
LE,
SLT,
SLE,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum UnOpType {
NEG,
NOT,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum Endianness {
LittleEndian,
BigEndian,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn variant_deserialization() {
let string = "\"UNSIGNED\"";
assert_eq!(CastType::UNSIGNED, serde_json::from_str(string).unwrap());
let string = "\"NEG\"";
assert_eq!(UnOpType::NEG, serde_json::from_str(string).unwrap());
}
#[test]
fn bitvector_deserialization() {
let bitv = Bitvector::from_u64(234);
let string = serde_json::to_string(&bitv).unwrap();
println!("{}", string);
println!("{:?}", bitv);
let string = "{\"digits\":[234],\"width\":[64]}";
assert_eq!(bitv, serde_json::from_str(string).unwrap());
}
#[test]
fn expression_deserialization() {
let string = "{\"BinOp\":{\"lhs\":{\"Const\":{\"digits\":[234],\"width\":[8]}},\"op\":\"PLUS\",\"rhs\":{\"Const\":{\"digits\":[234],\"width\":[8]}}}}";
let bitv = Bitvector::from_u8(234);
let exp = Expression::BinOp {
op: BinOpType::PLUS,
lhs: Box::new(Expression::Const(bitv.clone())),
rhs: Box::new(Expression::Const(bitv)),
};
println!("{}", serde_json::to_string(&exp).unwrap());
assert_eq!(exp, serde_json::from_str(string).unwrap())
}
}
use super::BitSize;
use crate::prelude::*;
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
pub struct Variable {
pub name: String,
pub type_: Type,
pub is_temp: bool,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)]
pub enum Type {
Immediate(BitSize),
Memory {
addr_size: BitSize,
elem_size: BitSize,
},
Unknown,
}
impl Variable {
pub fn bitsize(&self) -> Result<BitSize, Error> {
if let Type::Immediate(bitsize) = self.type_ {
Ok(bitsize)
} else {
Err(anyhow!("Not a register variable"))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn type_deserialization() {
let typ = Type::Immediate(64);
let string = serde_json::to_string_pretty(&typ).expect("Serialization failed");
println!("{}", &string);
let _: Type = serde_json::from_str(&string).expect("Deserialization failed");
let typ = Type::Memory {
addr_size: 64,
elem_size: 8,
};
let string = serde_json::to_string_pretty(&typ).expect("Serialization failed");
println!("{}", &string);
let _: Type = serde_json::from_str(&string).expect("Deserialization failed");
let typ = Type::Unknown;
let string = serde_json::to_string_pretty(&typ).expect("Serialization failed");
println!("{}", &string);
let _: Type = serde_json::from_str(&string).expect("Deserialization failed");
}
#[test]
fn var_type_from_ocaml() {
let json_string = "{\"Memory\":{\"addr_size\":64,\"elem_size\":8}}";
let typ = Type::Memory {
addr_size: 64,
elem_size: 8,
};
assert_eq!(typ, serde_json::from_str(json_string).unwrap())
}
#[test]
fn var_from_ocaml() {
let json_string = "{\"is_temp\":false,\"name\":\"RAX\",\"type_\":{\"Memory\":{\"addr_size\":64,\"elem_size\":8}}}";
let var = Variable {
name: "RAX".to_string(),
type_: Type::Memory {
addr_size: 64,
elem_size: 8,
},
is_temp: false,
};
assert_eq!(var, serde_json::from_str(json_string).unwrap())
}
}
use super::serde::JsonBuilder;
use super::OcamlSendable;
use crate::term::*;
use crate::utils::log::CweWarning;
use super::failwith_on_panic;
fn run_pointer_inference(program_jsonbuilder_val: ocaml::Value) -> (Vec<CweWarning>, Vec<String>) {
let json_builder = unsafe { JsonBuilder::from_ocaml(&program_jsonbuilder_val) };
let program_json = serde_json::Value::from(json_builder);
let project: Project =
serde_json::from_value(program_json).expect("Project deserialization failed");
crate::analysis::pointer_inference::run(&project, false)
}
caml!(rs_run_pointer_inference(program_jsonbuilder_val) {
return failwith_on_panic( || {
let cwe_warnings_and_log = run_pointer_inference(program_jsonbuilder_val);
let cwe_warnings_and_log_json = serde_json::to_string(&cwe_warnings_and_log).unwrap();
let ocaml_string = ocaml::Str::from(&cwe_warnings_and_log_json as &str);
ocaml::Value::from(ocaml_string)
});
});
fn run_pointer_inference_and_print_debug(program_jsonbuilder_val: ocaml::Value) {
let json_builder = unsafe { JsonBuilder::from_ocaml(&program_jsonbuilder_val) };
let program_json = serde_json::Value::from(json_builder);
let project: Project =
serde_json::from_value(program_json).expect("Project deserialization failed");
crate::analysis::pointer_inference::run(&project, true); // TODO: This discard all CweWarnings and log messages. Change that?
}
caml!(rs_run_pointer_inference_and_print_debug(program_jsonbuilder_val) {
return failwith_on_panic( || {
run_pointer_inference_and_print_debug(program_jsonbuilder_val);
ocaml::Value::unit()
});
});
/*!
# Foreign Function Interface
This module contains all functions that interact with Ocaml via the foreign function interface.
*/
use std::rc::Rc;
pub mod analysis;
pub mod serde;
/// Helper function for catching panics at the ffi-border.
/// If a panic occurs while executing F and that panic unwinds the stack,
/// the panic is caught and an Ocaml failwith exception is thrown instead.
///
/// Stack unwinding through a panic across a ffi-boundary is undefined behaviour.
/// As of Rust 1.41 catching panics at ffi-borders is still not the default behaviour,
/// since it would break backwards compatibility with some crates depending on this undefined behaviour.
/// Throwing an Ocaml failwith exception instead allows stack unwinding and better error messages.
/// Note that the Ocaml exception should *not* be caught,
/// since recovering from it may lead to undefined behavior on the Rust side.
fn failwith_on_panic<F, T>(closure: F) -> T
where
F: FnOnce() -> T,
{
match std::panic::catch_unwind(std::panic::AssertUnwindSafe(closure)) {
Ok(value) => value,
Err(_) => {
// Throw an Ocaml failwith-exception.
// This may not be safe if the exception is caught and recovered from on the Ocaml side!
// We assume that these errors are only caught for error printing but not for recovering from it.
ocaml::runtime::failwith("Rust-Panic catched at FFI-boundary");
std::process::abort();
}
}
}
/// This is a convenience trait for objects that may be sent as opaque objects across the ffi-boundary to Ocaml.
/// For that they are wrapped as Rc<T>.
/// Note that this trait does not prevent memory leaks in itself!
/// Whenever such an object is created and sent across the ffi-boundary,
/// the finalizer must be attached to it on the Ocaml side!
trait OcamlSendable: std::marker::Sized {
/// Pack the object into an Ocaml value
fn to_ocaml(self) -> ocaml::Value {
let boxed_val = Rc::new(self);
ocaml::Value::nativeint(Rc::into_raw(boxed_val) as isize)
}
/// Unpack an object that is stored as a `Rc<T>` wrapped in an Ocaml value.
///
/// Note that the caller has to ensure that the wrapped object has the correct type.
unsafe fn from_ocaml(ocaml_val: &ocaml::Value) -> &Self {
let ptr: *const Self = ocaml_val.nativeint_val() as *const Self;
ptr.as_ref().unwrap()
}
/// Unpack a `Rc<T>` object wrapped in an Ocaml value and return a clone of it.
///
/// Note that the caller has to ensure that the wrapped object has the correct type.
unsafe fn from_ocaml_rc(ocaml_val: &ocaml::Value) -> Rc<Self> {
let ptr: *const Self = ocaml_val.nativeint_val() as *const Self;
let rc_box = Rc::from_raw(ptr);
let rc_clone = rc_box.clone(); // Increasing the reference count by 1
let _ = Rc::into_raw(rc_box); // Do not decrease the reference count when rc_box goes out of scope!
rc_clone
}
fn ocaml_finalize(ocaml_val: ocaml::Value) {
let ptr: *const Self = ocaml_val.nativeint_val() as *const Self;
let _ = unsafe { Rc::from_raw(ptr) };
}
}
/*!
# FFI-functions for generating serde_json objects
This module defines functions for generating opaque serde_json::Value objects in Ocaml
which can then be deserialized with Serde on the Rust side. Signatures of the provided functions:
```Ocaml
type serde_json = nativeint (* This stores pointers, so treat this as an opaque type! *)
external build_null: unit -> serde_json = "rs_build_serde_null"
external build_bool: bool -> serde_json = "rs_build_serde_bool"
external build_number: int -> serde_json = "rs_build_serde_number"
external build_string: string -> serde_json = "rs_build_serde_string"
external build_array: serde_json list -> serde_json = "rs_build_serde_array_from_list"
external build_object: (string * serde_json) list -> serde_json = "rs_build_serde_object"
external build_bitvector: string -> serde_json = "rs_build_serde_bitvector"
(* Convert a serde_json object to a json string (used for unit tests). *)
external to_string: serde_json -> string = "rs_convert_json_to_string"
```
*/
use super::OcamlSendable;
use ocaml::{FromValue, ToValue};
use std::iter::FromIterator;
use std::rc::Rc;
use std::str::FromStr;
use super::failwith_on_panic;
/// A builder type for serde_json::Value objects.
///
/// Hiding the recursive nature of the data type behind reference counts prevents unneccessary
/// deep copies when creating json objects from Ocaml, which would lead to a runtime quadratic in the size of the json object.
/// However, when converting to serde_json::Value, one deep copy is still necessary.
#[derive(Clone, Debug)]
pub enum JsonBuilder {
Null,
Bool(bool),
Number(isize),
PositiveNumber(u64), // currently used only for deserialization of bitvector
String(String),
Array(Vec<Rc<JsonBuilder>>),
Object(Vec<(String, Rc<JsonBuilder>)>),
}
impl OcamlSendable for JsonBuilder {}
/// Creating a serde_json::Value performing deep copy.
impl From<&JsonBuilder> for serde_json::Value {
fn from(builder: &JsonBuilder) -> serde_json::Value {
match builder {
JsonBuilder::Null => serde_json::Value::Null,
JsonBuilder::Bool(val) => serde_json::Value::Bool(*val),
JsonBuilder::Number(val) => serde_json::Value::Number(serde_json::Number::from(*val)),
JsonBuilder::PositiveNumber(val) => {
serde_json::Value::Number(serde_json::Number::from(*val))
}
JsonBuilder::String(val) => serde_json::Value::String(val.to_string()),
JsonBuilder::Array(elem_vec) => elem_vec
.iter()
.map(|rc_elem| serde_json::Value::from(&**rc_elem))
.collect(),
JsonBuilder::Object(tuple_vec) => serde_json::Value::Object(
serde_json::Map::from_iter(tuple_vec.iter().map(|(string_ref, json_builder)| {
(
string_ref.to_string(),
serde_json::Value::from(&**json_builder),
)
})),
),
}
}
}
caml!(rs_finalize_json_builder(builder_val) {
return failwith_on_panic( || {
JsonBuilder::ocaml_finalize(builder_val);
ocaml::Value::unit()
});
});
/// Build JsonBuilder::Null as Ocaml value
fn build_serde_null() -> ocaml::Value {
JsonBuilder::Null.to_ocaml()
}
caml!(rs_build_serde_null(_unit) {
return failwith_on_panic( || {
build_serde_null()
});
});
/// Build JsonBuilder::Bool as Ocaml value
fn build_serde_bool(bool_val: ocaml::Value) -> ocaml::Value {
let boolean: bool = bool::from_value(bool_val);
JsonBuilder::Bool(boolean).to_ocaml()
}
caml!(rs_build_serde_bool(bool_val) {
return failwith_on_panic( || {
build_serde_bool(bool_val)
});
});
/// Build JsonBuilder::Number as Ocaml value
fn build_serde_number(num: ocaml::Value) -> ocaml::Value {
let num: isize = ocaml::Value::isize_val(&num);
JsonBuilder::Number(num).to_ocaml()
}
caml!(rs_build_serde_number(number) {
return failwith_on_panic( || {
build_serde_number(number)
});
});
/// Build JsonBuilder::Object representing a bitvector from a string generated by `Bitvector.to_string` in Ocaml
fn build_serde_bitvector(bitvector_string_val: ocaml::Value) -> ocaml::Value {
let string = <&str>::from_value(bitvector_string_val);
let elements: Vec<&str> = string.split(':').collect();
let width = usize::from_str(&elements[1][0..(elements[1].len() - 1)])
.expect("Bitvector width parsing failed");
assert!(width > 0);
let mut num_list = Vec::new();
let mut number_slice: &str = elements[0];
if number_slice.starts_with("0x") {
number_slice = &number_slice[2..];
}
while number_slice.len() > 0 {
if number_slice.len() > 16 {
let digit = u64::from_str_radix(&number_slice[(number_slice.len() - 16)..], 16)
.expect("Bitvector value parsing failed");
num_list.push(Rc::new(JsonBuilder::PositiveNumber(digit)));
number_slice = &number_slice[..(number_slice.len() - 16)];
} else {
let digit =
u64::from_str_radix(&number_slice, 16).expect("Bitvector value parsing failed");
num_list.push(Rc::new(JsonBuilder::PositiveNumber(digit)));
number_slice = "";
};
}
while num_list.len() <= (width - 1) / 64 {
num_list.push(Rc::new(JsonBuilder::PositiveNumber(0)));
}
num_list.reverse(); // since the digits were parsed in reverse order
let mut width_list = Vec::new();
width_list.push(Rc::new(JsonBuilder::Number(width as isize)));
let result = JsonBuilder::Object(vec![
("digits".to_string(), Rc::new(JsonBuilder::Array(num_list))),
("width".to_string(), Rc::new(JsonBuilder::Array(width_list))),
]);
// TODO: remove deserialization check
let check_serde = serde_json::to_string(&serde_json::Value::from(&result)).unwrap();
let _bitv: apint::ApInt = serde_json::from_str(&check_serde)
.expect(&format!("Invalid value generated: {}", check_serde));
result.to_ocaml()
}
caml!(rs_build_serde_bitvector(bitvector_string) {
return failwith_on_panic( || {
build_serde_bitvector(bitvector_string)
});
});
/// Build JsonBuilder::String as Ocaml value
fn build_serde_string(string_val: ocaml::Value) -> ocaml::Value {
let string = String::from_value(string_val);
JsonBuilder::String(string).to_ocaml()
}
caml!(rs_build_serde_string(string_val) {
return failwith_on_panic( || {
build_serde_string(string_val)
});
});
/// Build JsonBuilder::Array as Ocaml value from an Ocaml list
fn build_serde_array_from_list(list_val: ocaml::Value) -> ocaml::Value {
let ocaml_list = ocaml::List::from(list_val);
let value_vec = ocaml_list.to_vec();
let vec = value_vec
.into_iter()
.map(|ocaml_val| unsafe { JsonBuilder::from_ocaml_rc(&ocaml_val) })
.collect();
JsonBuilder::Array(vec).to_ocaml()
}
caml!(rs_build_serde_array_from_list(list_val) {
return failwith_on_panic( || {
build_serde_array_from_list(list_val)
});
});
/// Build JsonBuilder::Object as Ocaml value from an Ocaml list of tuples
fn build_serde_object(tuple_list_val: ocaml::Value) -> ocaml::Value {
let ocaml_list = ocaml::List::from(tuple_list_val);
let pairs_vec = ocaml_list.to_vec();
let pairs = pairs_vec
.into_iter()
.map(|ocaml_tuple| {
let tuple = ocaml::Tuple::from(ocaml_tuple);
let key_ocaml = tuple
.get(0)
.expect("Error: Ocaml tuple contains no element");
let key = String::from_value(key_ocaml);
let value_ocaml: ocaml::Value = tuple
.get(1)
.expect("Error: Ocaml tuple contains not enough elements");
let data = unsafe { JsonBuilder::from_ocaml_rc(&value_ocaml) };
(key, data)
})
.collect();
JsonBuilder::Object(pairs).to_ocaml()
}
caml!(rs_build_serde_object(tuple_list_val) {
return failwith_on_panic( || {
build_serde_object(tuple_list_val)
});
});
/// Get the Json string corresponding to a JsonBuilder object and return it as an Ocaml value.
fn get_json_string(builder_val: ocaml::Value) -> ocaml::Value {
let builder = unsafe { JsonBuilder::from_ocaml(&builder_val) };
let json_string = serde_json::Value::from(builder).to_string();
ocaml::Str::from(&json_string as &str).to_value()
}
caml!(rs_convert_json_to_string(builder_val) {
return failwith_on_panic( || {
get_json_string(builder_val)
});
});
/*!
# cwe_checker_rs
Parts of the cwe_checker that are written in Rust.
*/
#[macro_use]
extern crate ocaml;
pub mod analysis;
pub mod bil;
pub mod ffi;
pub mod term;
pub mod utils;
mod prelude {
pub use apint::Width;
pub use serde::{Deserialize, Serialize};
pub use crate::bil::{BitSize, Bitvector};
pub use crate::term::Tid;
pub use anyhow::{anyhow, Error};
}
#[cfg(test)]
mod tests {
#[test]
fn it_works() {
assert_eq!(2 + 2, 4);
}
}
use crate::bil::*;
use serde::{Deserialize, Serialize};
pub mod symbol;
use symbol::ExternSymbol;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub struct Tid {
id: String,
pub address: String,
}
impl Tid {
pub fn new<T: ToString>(val: T) -> Tid {
Tid {
id: val.to_string(),
address: "UNKNOWN".to_string(),
}
}
}
impl std::fmt::Display for Tid {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(formatter, "{}", self.id)
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Term<T> {
pub tid: Tid,
pub term: T,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Def {
pub lhs: Variable,
pub rhs: Expression,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Jmp {
pub condition: Option<Expression>,
pub kind: JmpKind,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum JmpKind {
Call(Call),
Goto(Label),
Return(Label),
Interrupt { value: isize, return_addr: Tid },
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Call {
pub target: Label,
pub return_: Option<Label>,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Label {
Direct(Tid),
Indirect(Expression),
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Blk {
pub defs: Vec<Term<Def>>,
pub jmps: Vec<Term<Jmp>>,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Sub {
pub name: String,
pub blocks: Vec<Term<Blk>>,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Program {
pub subs: Vec<Term<Sub>>,
pub extern_symbols: Vec<ExternSymbol>,
pub entry_points: Vec<Tid>,
}
// TODO: Add deserialization from Ocaml to the FFI module for project!
// TODO: Add other CPU-architecture specific data to this struct!
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Project {
pub program: Term<Program>,
pub cpu_architecture: String,
pub stack_pointer_register: Variable,
pub callee_saved_registers: Vec<String>,
pub parameter_registers: Vec<String>,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Arg {
pub var: Variable,
pub location: Expression,
pub intent: ArgIntent,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum ArgIntent {
Input,
Output,
Both,
Unknown,
}
impl ArgIntent {
pub fn is_input(&self) -> bool {
match self {
Self::Input | Self::Both | Self::Unknown => true,
Self::Output => false,
}
}
pub fn is_output(&self) -> bool {
match self {
Self::Output | Self::Both | Self::Unknown => true,
Self::Input => false,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn term_deserialization() {
let string = "{\"term\":{\"defs\":[],\"jmps\":[]},\"tid\":{\"id\":\"@block\",\"address\":\"UNKNOWN\"}}";
let tid = Tid::new("@block".to_string());
let block_term = Term {
tid,
term: Blk {
defs: Vec::new(),
jmps: Vec::new(),
},
};
println!("{}", serde_json::to_string(&block_term).unwrap());
assert_eq!(block_term, serde_json::from_str(&string).unwrap());
}
}
use super::Arg;
use crate::bil::*;
use crate::prelude::*;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct ExternSymbol {
pub tid: Tid,
pub address: String,
pub name: String,
pub calling_convention: Option<String>,
pub arguments: Vec<Arg>,
}
impl ExternSymbol {
/// Returns the return register of an extern symbol.
/// Returns an error if the function has not exactly one return argument
/// or if the return argument is not a register.
pub fn get_unique_return_register(&self) -> Result<&crate::bil::variable::Variable, Error> {
let return_args: Vec<_> = self
.arguments
.iter()
.filter(|arg| arg.intent.is_output())
.collect();
if return_args.len() != 1 {
return Err(anyhow!(
"Wrong number of return register: Got {}, expected 1",
return_args.len()
));
}
match &return_args[0].location {
Expression::Var(var) => Ok(var),
_ => Err(anyhow!("Return location is not a register"))?,
}
}
/// Returns the parameter expression of an extern symbol.
/// Returns an error if the function has not exactly one parameter argument.
pub fn get_unique_parameter(&self) -> Result<&crate::bil::Expression, Error> {
let param_args: Vec<_> = self
.arguments
.iter()
.filter(|arg| arg.intent.is_input())
.collect();
if param_args.len() != 1 {
return Err(anyhow!(
"Wrong number of return register: Got {}, expected 1",
param_args.len()
));
}
Ok(&param_args[0].location)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extern_symbol_serialization() {
let symbol = ExternSymbol {
tid: Tid::new("Tid"),
address: "Somewhere".to_string(),
name: "extern_fn".to_string(),
calling_convention: Some("cconv".to_string()),
arguments: Vec::new(),
};
let json: String = serde_json::to_string_pretty(&symbol).unwrap();
println!("{}", json);
let _symbol: ExternSymbol = serde_json::from_str(&json).unwrap();
}
}
use crate::analysis::abstract_domain::AbstractDomain;
use crate::prelude::*;
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
// TODO: This is a helper not only for abstract domains! It needs its own source file!
#[derive(Serialize, Deserialize, Debug, Hash, Clone)]
pub struct FastCmpArc<T>(pub Arc<T>);
impl<T: PartialEq + Eq> PartialEq for FastCmpArc<T> {
fn eq(&self, other: &Self) -> bool {
if Arc::ptr_eq(&self.0, &other.0) {
true
} else {
self.0.eq(&other.0)
}
}
}
impl<T: Eq> Eq for FastCmpArc<T> {}
impl<T: AbstractDomain + Clone> AbstractDomain for FastCmpArc<T> {
fn top(&self) -> Self {
FastCmpArc(Arc::new(self.0.top()))
}
fn merge(&self, other: &Self) -> Self {
if Arc::ptr_eq(&self.0, &other.0) {
self.clone()
} else {
FastCmpArc(Arc::new(self.0.merge(&other.0)))
}
}
}
impl<T: PartialOrd + Ord> PartialOrd for FastCmpArc<T> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl<T: PartialOrd + Ord> Ord for FastCmpArc<T> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
if Arc::ptr_eq(&self.0, &other.0) {
std::cmp::Ordering::Equal
} else {
self.0.cmp(&other.0)
}
}
}
impl<T> Deref for FastCmpArc<T> {
type Target = T;
fn deref(&self) -> &T {
&self.0
}
}
impl<T: Clone> DerefMut for FastCmpArc<T> {
fn deref_mut(&mut self) -> &mut T {
Arc::make_mut(&mut self.0)
}
}
use crate::prelude::*;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord, Default)]
pub struct CweWarning {
pub name: String,
pub version: String,
pub addresses: Vec<String>,
pub tids: Vec<String>,
pub symbols: Vec<String>,
pub other: Vec<Vec<String>>,
pub description: String,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub struct LogMessage {
pub text: String,
pub level: LogLevel,
pub location: Option<Tid>,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum LogLevel {
Debug,
Error,
Info,
}
impl std::fmt::Display for LogMessage {
fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(ref tid) = self.location {
match self.level {
LogLevel::Debug => write!(formatter, "Debug: {}: {}", tid.address, self.text),
LogLevel::Error => write!(formatter, "Error: {}: {}", tid.address, self.text),
LogLevel::Info => write!(formatter, "Info: {}: {}", tid.address, self.text),
}
} else {
match self.level {
LogLevel::Debug => write!(formatter, "Debug: {}", self.text),
LogLevel::Error => write!(formatter, "Error: {}", self.text),
LogLevel::Info => write!(formatter, "Info: {}", self.text),
}
}
}
}
pub mod fast_cmp_arc;
pub mod log;
(lang dune 1.6) (lang dune 2.0)
(name cwe_checker) (name cwe_checker)
...@@ -67,6 +67,7 @@ The names of all available modules can be printed with the [-module-versions] co ...@@ -67,6 +67,7 @@ The names of all available modules can be printed with the [-module-versions] co
To annotate CWE-hits in IDA Pro or Ghidra, first run {i cwe_checker} and save the JSON-formatted output to a file. To annotate CWE-hits in IDA Pro or Ghidra, first run {i cwe_checker} and save the JSON-formatted output to a file.
{[bap [BINARY] --pass=cwe-checker --cwe-checker-json --cwe-checker-out=cwe_hits.json]} {[bap [BINARY] --pass=cwe-checker --cwe-checker-json --cwe-checker-out=cwe_hits.json]}
After that execute the tool-specific script to import the results:
- For IDA Pro run the [cwe_checker_to_ida.py] script located in the [cwe_checker_to_ida] folder. - For IDA Pro run the [cwe_checker_to_ida.py] script located in the [cwe_checker_to_ida] folder.
{[python3 cwe_checker_to_ida.py -i cwe_hits.json -o cwe_hits.py]} {[python3 cwe_checker_to_ida.py -i cwe_hits.json -o cwe_hits.py]}
Now open the binary file in IDA Pro and execute the generated [cwe_hits.py] script from within IDA Pro (Alt+F7). Now open the binary file in IDA Pro and execute the generated [cwe_hits.py] script from within IDA Pro (Alt+F7).
......
all:
bapbuild -pkgs yojson,unix,ppx_jane,cwe_checker_core cwe_checker_pointer_inference_debug.plugin
bapbundle install cwe_checker_pointer_inference_debug.plugin
clean:
bapbuild -clean
uninstall:
bapbundle remove cwe_checker_pointer_inference_debug.plugin
open Bap.Std
open Core_kernel
open Cwe_checker_core
include Self()
let main project =
let program = Project.program project in
let tid_map = Address_translation.generate_tid_map program in
Pointer_inference.run_and_print_debug project tid_map
module Cmdline = struct
open Config
let () = when_ready (fun ({get=(!!)}) -> Project.register_pass' main)
let () = manpage [`S "DESCRIPTION";
`P "This plugin prints verbose debug information from the pointer inference analysis of the cwe_checker to stdout."]
end
open Bap.Std
open Core_kernel
external rs_run_pointer_inference: Serde_json.t -> string = "rs_run_pointer_inference"
external rs_run_pointer_inference_and_print_debug: Serde_json.t -> unit = "rs_run_pointer_inference_and_print_debug"
type cwelist = Log_utils.CweWarning.t array [@@deriving yojson]
let run (project: Project.t) (tid_map: Bap.Std.word Bap.Std.Tid.Map.t) : unit =
let program = Project.program project in
let entry_points = Symbol_utils.get_program_entry_points program in
let entry_points = List.map entry_points ~f:(fun sub -> Term.tid sub) in
let extern_symbols = Symbol_utils.build_and_return_extern_symbols project program tid_map in
let project_serde = Serde_json.of_project project extern_symbols entry_points tid_map in
let cwe_warnings_json = Yojson.Safe.from_string @@ rs_run_pointer_inference project_serde in
match cwe_warnings_json with
| `List ((`List cwe_warnings) :: (`List log_messages) :: []) ->
List.iter cwe_warnings ~f:(fun warning -> Log_utils.collect_cwe_warning @@ Result.ok_or_failwith @@ Log_utils.CweWarning.of_yojson warning);
List.iter log_messages ~f:(fun message ->
match message with
| `String message_string ->
begin match String.lsplit2 message_string ~on:':' with
| Some("Error", msg) -> Log_utils.error @@ String.strip msg
| Some("Debug", msg) -> Log_utils.debug @@ String.strip msg
| Some("Info", msg) -> Log_utils.info @@ String.strip msg
| _ -> failwith "Malformed log-message."
end
| _ -> failwith "Log-message is not a string."
)
| _ -> failwith "Log-message-json not as expected"
let run_and_print_debug (project: Project.t) (tid_map: Bap.Std.word Bap.Std.Tid.Map.t) : unit =
let program = Project.program project in
let entry_points = Symbol_utils.get_program_entry_points program in
let entry_points = List.map entry_points ~f:(fun sub -> Term.tid sub) in
let extern_symbols = Symbol_utils.build_and_return_extern_symbols project program tid_map in
let project_serde = Serde_json.of_project project extern_symbols entry_points tid_map in
rs_run_pointer_inference_and_print_debug project_serde
(** This module manages the communication with the actual pointer inference analysis
through the foreign function interface to Rust.
*)
open Bap.Std
(** Run the pointer inference analysis and log the returned CWE warnings and log messages. *)
val run: Project.t -> Bap.Std.word Bap.Std.Tid.Map.t -> unit
(** Run the pointer inference analysis and print the computed state of each basic block
(at the start and at the end of the basic block respectively)
as json to stdout.
Does not print log messages or CWE warnings.
The output is meant for debugging purposes.
*)
val run_and_print_debug: Project.t -> Bap.Std.word Bap.Std.Tid.Map.t -> unit
open Bap.Std
let name = "Memory"
let version = "0.1"
let check_cwe (_program: Program.t) (project: Project.t) (tid_map: word Tid.Map.t) (_: string list list) (_: string list) =
Pointer_inference.run project tid_map
(** This module implements memory-related CWE checks.
Right now the check detects cases of
- {{: https://cwe.mitre.org/data/definitions/415.html} CWE 415: Double Free}
- {{: https://cwe.mitre.org/data/definitions/416.html} CWE 416: Use After Free}
{1 How the check works}
Via Dataflow Analysis, the check tries to keep track of all memory objects and pointers
known at specific points in the program.
It also keeps track of the status of memory object, i.e. if they have been already freed.
Access to an already freed object generates a CWE warning.
In cases where the analysis cannot reliably determine whether accessed memory has been freed or not,
a CWE warning may (or may not) be issued to the user based on the likelihood of it being a false positive.
Note that the check is still experimental.
Bugs may occur and the rate of false positive and false negative warnings is not yet known.
*)
val name: string
val version: string
val check_cwe: Bap.Std.program Bap.Std.term -> Bap.Std.project -> Bap.Std.word Bap.Std.Tid.Map.t -> string list list -> string list -> unit
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
core_kernel core_kernel
ppx_deriving_yojson.runtime) ppx_deriving_yojson.runtime)
(preprocess (pps ppx_jane ppx_deriving_yojson)) (preprocess (pps ppx_jane ppx_deriving_yojson))
(foreign_archives cwe_checker_rs)
(c_library_flags (-lpthread -lc -lm)) ; needed for linking the Rust runtime
) )
(include_subdirs unqualified) ; Include all subdirs when looking for source files (include_subdirs unqualified) ; Include all subdirs when looking for source files
(lang dune 1.6) (lang dune 2.0)
...@@ -24,7 +24,8 @@ let known_modules = [{cwe_func = Cwe_190.check_cwe; name = Cwe_190.name; version ...@@ -24,7 +24,8 @@ let known_modules = [{cwe_func = Cwe_190.check_cwe; name = Cwe_190.name; version
{cwe_func = Cwe_476.check_cwe; name = Cwe_476.name; version = Cwe_476.version; requires_pairs = false; has_parameters = true}; {cwe_func = Cwe_476.check_cwe; name = Cwe_476.name; version = Cwe_476.version; requires_pairs = false; has_parameters = true};
{cwe_func = Cwe_560.check_cwe; name = Cwe_560.name; version = Cwe_560.version; requires_pairs = false; has_parameters = false}; {cwe_func = Cwe_560.check_cwe; name = Cwe_560.name; version = Cwe_560.version; requires_pairs = false; has_parameters = false};
{cwe_func = Cwe_676.check_cwe; name = Cwe_676.name; version = Cwe_676.version; requires_pairs = false; has_parameters = false}; {cwe_func = Cwe_676.check_cwe; name = Cwe_676.name; version = Cwe_676.version; requires_pairs = false; has_parameters = false};
{cwe_func = Cwe_782.check_cwe; name = Cwe_782.name; version = Cwe_782.version; requires_pairs = false; has_parameters = false}] {cwe_func = Cwe_782.check_cwe; name = Cwe_782.name; version = Cwe_782.version; requires_pairs = false; has_parameters = false};
{cwe_func = Memory_cwes.check_cwe; name = Memory_cwes.name; version = Memory_cwes.version; requires_pairs = false; has_parameters = false}]
let cmdline_flags = [ let cmdline_flags = [
...@@ -97,7 +98,10 @@ let full_run project config = ...@@ -97,7 +98,10 @@ let full_run project config =
let program = Project.program project in let program = Project.program project in
let tid_address_map = Address_translation.generate_tid_map program in let tid_address_map = Address_translation.generate_tid_map program in
let json = Yojson.Basic.from_file config in let json = Yojson.Basic.from_file config in
List.iter known_modules ~f:(fun cwe -> execute_cwe_module cwe json program project tid_address_map) let full_run_modules = List.filter known_modules ~f:(fun cwe_module ->
cwe_module.name <> "Memory" (* TODO: Remove this when the memory check is more stable *)
) in
List.iter full_run_modules ~f:(fun cwe -> execute_cwe_module cwe json program project tid_address_map)
let build_output_path (path : string) : string = let build_output_path (path : string) : string =
......
...@@ -130,15 +130,11 @@ let parse_dyn_sym_line (line : string) : string option = ...@@ -130,15 +130,11 @@ let parse_dyn_sym_line (line : string) : string option =
done; done;
str_list := !line :: !str_list; str_list := !line :: !str_list;
match !str_list with match !str_list with
| value :: func1 :: func2 :: _ -> begin | _value :: func1 :: func2 :: _ -> begin
match ( String.strip ~drop:(fun x -> x = '0') value ) with if (String.equal func1 "DF" || String.equal func2 "DF") then (
| "" -> begin List.last !str_list
if (String.equal func1 "DF" || String.equal func2 "DF") then ( )
List.last !str_list else None
)
else None
end
| _ -> None (* The symbol has a nonzero value, so we assume that it is not an extern function symbol. *)
end end
| _ -> None | _ -> None
......
(* This module implements functionality related to parsing the JSON configuration file. *) (** This module implements functionality related to parsing the JSON configuration file.
*)
(** Returns pairs of symbols for a given CWE check. *) (** Returns pairs of symbols for a given CWE check. *)
......
...@@ -23,7 +23,7 @@ module CweWarning : sig ...@@ -23,7 +23,7 @@ module CweWarning : sig
symbols: string list; symbols: string list;
other : string list list; other : string list list;
description : string; description : string;
} } [@@deriving yojson]
end end
module CheckPath : sig module CheckPath : sig
......
(** This module contains FFI-functionality for generating serde_json objects on the Rust side
which is used for converting complex Ocaml data structures to Rust data structures.
*)
open Core_kernel
open Bap.Std
open Symbol_utils
type t = nativeint
external rs_finalize_json_builder: t -> unit = "rs_finalize_json_builder"
external rs_build_serde_null: unit -> t = "rs_build_serde_null"
external rs_build_serde_bool: bool -> t = "rs_build_serde_bool"
external rs_build_serde_number: int -> t = "rs_build_serde_number"
external rs_build_serde_string: string -> t = "rs_build_serde_string"
external rs_build_serde_array_from_list: t list -> t = "rs_build_serde_array_from_list"
external rs_build_serde_object: (string * t) list -> t = "rs_build_serde_object"
external rs_build_bitvector: string -> t = "rs_build_serde_bitvector"
external rs_convert_json_to_string: t -> string = "rs_convert_json_to_string"
let add_finalizer value =
(Gc.Expert.add_finalizer_exn value rs_finalize_json_builder) (* TODO: if test throws Invalid_argument exceptions, the values to finalize must be wrapped in ref to ensure heap allocation! *)
let build_null (): t =
let value = rs_build_serde_null () in
let () = add_finalizer value in
value
let build_number (num: int) : t =
let value = rs_build_serde_number num in
let () = add_finalizer value in
value
let build_bool (boolean: bool) : t =
let value = rs_build_serde_bool boolean in
let () = add_finalizer value in
value
let build_string (string_val: string) : t =
let value = rs_build_serde_string string_val in
let () = add_finalizer value in
value
let build_array (obj_list: t list) : t =
let value = rs_build_serde_array_from_list obj_list in
let () = add_finalizer value in
value
let build_object (entries: (string * t) list) : t =
let value = rs_build_serde_object entries in
let () = add_finalizer value in
value
let to_string (serde_json: t) : String.t =
rs_convert_json_to_string serde_json
let of_var_type (var_type: Bil.Types.typ) : t =
match var_type with
| Imm bitsize ->
build_object (
("Immediate", build_number bitsize) :: []
)
| Mem (addr_size, size) ->
build_object (
("Memory", build_object (
("addr_size", build_number (Size.in_bits addr_size)) ::
("elem_size", build_number (Size.in_bits size)) :: []
)) :: [])
| Unk -> build_string "Unknown"
let of_var (var: Var.t) : t =
build_object [
("name", build_string (Var.name var));
("type_", of_var_type (Var.typ var));
("is_temp", build_bool (Var.is_virtual var));
]
let of_cast_type (cast_type: Bil.Types.cast) : t =
build_string (Sexp.to_string (Bil.Types.sexp_of_cast cast_type))
let of_binop_type (binop: Bil.Types.binop) : t =
build_string (Sexp.to_string (Bil.Types.sexp_of_binop binop))
let of_unop_type (unop: Bil.Types.unop) : t =
build_string (Sexp.to_string (Bil.Types.sexp_of_unop unop))
let of_endianness (endianness: Bitvector.endian) : t =
build_string (Sexp.to_string (Bitvector.sexp_of_endian endianness))
let of_bitvector (bitv: Bitvector.t) : t =
let value = rs_build_bitvector (Bitvector.to_string bitv) in
let () = add_finalizer value in
value
let rec of_exp (exp: Exp.t) : t =
begin match exp with
| Var(var) ->
build_object (("Var", of_var var) :: [])
| Int(bitvector) ->
build_object (("Const", of_bitvector bitvector) :: [])
| Load(mem, addr, endian, size) ->
build_object [ ("Load", build_object [
("memory", of_exp mem);
("address", of_exp addr);
("endian", of_endianness endian);
("size", build_number (Size.in_bits size));
]);]
| Store(mem, addr, value, endian, size) ->
build_object [ ("Store", build_object [
("memory", of_exp mem);
("address", of_exp addr);
("value", of_exp value);
("endian", of_endianness endian);
("size", build_number (Size.in_bits size));
]);]
| BinOp(type_, lhs, rhs) ->
build_object [ ("BinOp", build_object [
("op", of_binop_type type_);
("lhs", of_exp lhs);
("rhs", of_exp rhs);
]);]
| UnOp(type_, exp) ->
build_object [ ("UnOp", build_object [
("op", of_unop_type type_);
("arg", of_exp exp);
]);]
| Cast(cast, width, exp) ->
build_object [ ("Cast", build_object [
("kind", of_cast_type cast);
("width", build_number width);
("arg", of_exp exp);
]);]
| Let(var, bound_exp, body_exp) ->
build_object [ ("Let", build_object [
("var", of_var var);
("bound_exp", of_exp bound_exp);
("body_exp", of_exp body_exp)
]);]
| Unknown(text, typ) ->
build_object [ ("Unknown", build_object [
("description", build_string text);
("type_", of_var_type typ);
]);]
| Ite(if_, then_, else_) ->
build_object [ ("IfThenElse", build_object [
("condition", of_exp if_);
("true_exp", of_exp then_);
("false_exp", of_exp else_);
]);]
| Extract(high, low, exp) ->
build_object [ ("Extract", build_object [
("low_bit", build_number low);
("high_bit", build_number high);
("arg", of_exp exp)
]);]
| Concat(left, right) ->
build_object [ ("Concat", build_object [
("left", of_exp left);
("right", of_exp right)
]);]
end
let of_tid (tid: Tid.t) (tid_map: word Tid.Map.t) : t =
build_object [
("id", build_string @@ Tid.name tid);
("address", build_string @@ Address_translation.translate_tid_to_assembler_address_string tid tid_map);
]
let of_def (def: Def.t) (tid_map: word Tid.Map.t) : t =
build_object [
("tid", of_tid (Term.tid def) tid_map);
("term", build_object [
("lhs", of_var (Def.lhs def));
("rhs", of_exp (Def.rhs def));
]);
]
let of_jmp_label (jmp_label: label) (tid_map: word Tid.Map.t) : t =
match jmp_label with
| Direct(tid) ->
build_object [
("Direct", of_tid tid tid_map);
]
| Indirect(exp) ->
build_object [
("Indirect", of_exp exp);
]
let of_call (call: Call.t) (tid_map: word Tid.Map.t) : t =
build_object [
("target", of_jmp_label (Call.target call) tid_map);
("return_", match Call.return call with
| Some(target) -> of_jmp_label target tid_map
| None -> build_null ()
);
]
let of_jmp_kind (kind: jmp_kind) (tid_map: word Tid.Map.t) : t =
match kind with
| Call(call) ->
build_object [
("Call", of_call call tid_map);
]
| Goto(label) ->
build_object [
("Goto", of_jmp_label label tid_map);
]
| Ret(label) ->
build_object [
("Return", of_jmp_label label tid_map);
]
| Int(interrupt_num, tid) ->
build_object [
("Interrupt", build_object [
("value", build_number interrupt_num );
("return_addr", of_tid tid tid_map)
]);
]
let of_jmp (jmp: Jmp.t) (tid_map: word Tid.Map.t) : t =
build_object [
("tid", of_tid (Term.tid jmp) tid_map);
("term", build_object [
("condition", if Option.is_some (Jmp.guard jmp) then of_exp (Jmp.cond jmp) else build_null ());
("kind", of_jmp_kind (Jmp.kind jmp) tid_map);
]);
]
let of_blk (blk: Blk.t) (tid_map: word Tid.Map.t) : t =
let defs = Seq.to_list (Term.enum def_t blk) in
let defs = List.map defs ~f:(fun def -> of_def def tid_map) in
let jmps = Seq.to_list (Term.enum jmp_t blk) in
let jmps = List.map jmps ~f:(fun jmp -> of_jmp jmp tid_map) in
build_object [
("tid", of_tid (Term.tid blk) tid_map);
("term", build_object [
("defs", build_array defs);
("jmps", build_array jmps);
]);
]
let of_sub (sub: Sub.t) (tid_map: word Tid.Map.t) : t =
let blocks = Seq.to_list (Term.enum blk_t sub) in
let blocks = List.map blocks ~f:(fun block -> of_blk block tid_map) in
build_object [
("tid", of_tid (Term.tid sub) tid_map);
("term", build_object [
("name", build_string (Sub.name sub));
("blocks", build_array blocks);
]);
]
let of_extern_symbol (symbol: extern_symbol) (tid_map: word Tid.Map.t) : t =
build_object [
("tid", of_tid symbol.tid tid_map);
("address", build_string symbol.address);
("name", build_string symbol.name);
("calling_convention", match symbol.cconv with
| Some(cconv) -> build_string cconv
| None -> build_null ()
);
("arguments", build_array (List.map symbol.args ~f:(fun (var, expr, intent) ->
build_object [
("var", of_var var);
("location", of_exp expr);
("intent", match intent with
| Some(In) -> build_string "Input"
| Some(Out) -> build_string "Output"
| Some(Both) -> build_string "Both"
| None -> build_string "Unknown"
)
]
)))
]
let of_program (program: Program.t) (extern_symbols: extern_symbol List.t) (entry_points: Tid.t List.t) (tid_map: word Tid.Map.t) : t =
let subs = Seq.to_list (Term.enum sub_t program) in
let subs = List.map subs ~f:(fun sub -> of_sub sub tid_map) in
build_object [
("tid", of_tid (Term.tid program) tid_map);
("term", build_object [
("subs", build_array subs);
("extern_symbols", build_array (List.map extern_symbols ~f:(fun sym -> of_extern_symbol sym tid_map)));
("entry_points", build_array (List.map entry_points ~f:(fun tid -> of_tid tid tid_map)));
]);
]
let of_project (project: Project.t) (extern_symbols: extern_symbol List.t) (entry_points: Tid.t List.t) (tid_map: word Tid.Map.t) : t =
build_object [
("program", of_program (Project.program project) extern_symbols entry_points tid_map);
("cpu_architecture", build_string (Arch.to_string (Project.arch project)));
("stack_pointer_register", of_var (Symbol_utils.stack_register project));
("callee_saved_registers", build_array (List.map (Cconv.get_register_list project "callee_saved") ~f:(fun reg_name -> build_string reg_name) ));
("parameter_registers", build_array (List.map (Cconv.get_register_list project "params") ~f:(fun reg_name -> build_string reg_name) ))
]
(** This module allows the creation of SerdeJson objects that can be deserialized
to the corresponding data type in Rust.
Note that this is not optimized for speed, extensive usage could lead to measureable slowdown.
*)
open Core_kernel
open Bap.Std
type t
(** Build a Json Null object *)
val build_null: unit -> t
(** Build a Json boolean object *)
val build_bool: Bool.t -> t
(** Build a Json number object *)
val build_number: int -> t
(** Build a Json string object *)
val build_string: String.t -> t
(** Build a Json array object from a list of Json objects *)
val build_array: t List.t -> t
(** Build a Json object from a list of key-value-pairs *)
val build_object: (String.t * t) List.t -> t
(** Get the Json string corresponding to a Json object *)
val to_string: t -> String.t
val of_var_type: Bil.Types.typ -> t
val of_var: Var.t -> t
val of_cast_type: Bil.Types.cast -> t
val of_binop_type: Bil.Types.binop -> t
val of_unop_type: Bil.Types.unop -> t
val of_endianness: Bitvector.endian -> t
val of_bitvector: Bitvector.t -> t
val of_exp: Exp.t -> t
val of_tid: Tid.t -> word Tid.Map.t -> t
val of_def: Def.t -> word Tid.Map.t -> t
val of_jmp_label: Label.t -> word Tid.Map.t -> t
val of_call: Call.t -> word Tid.Map.t -> t
val of_jmp_kind: jmp_kind -> word Tid.Map.t -> t
val of_jmp: Jmp.t -> word Tid.Map.t -> t
val of_blk: Blk.t -> word Tid.Map.t -> t
val of_sub: Sub.t -> word Tid.Map.t -> t
val of_extern_symbol: Symbol_utils.extern_symbol -> word Tid.Map.t -> t
val of_program: Program.t -> Symbol_utils.extern_symbol List.t -> Tid.t List.t -> word Tid.Map.t -> t
val of_project: Project.t -> Symbol_utils.extern_symbol List.t -> Tid.t List.t -> word Tid.Map.t -> t
...@@ -2,12 +2,14 @@ import os ...@@ -2,12 +2,14 @@ import os
import subprocess import subprocess
def build_bap_cmd(filename, target, arch, compiler): def build_bap_cmd(filename, target, arch, compiler, check_name = None):
if check_name is None:
check_name = 'CWE%s' % target
if 'travis' in os.environ['USER']: if 'travis' in os.environ['USER']:
abs_path = os.path.abspath('test/artificial_samples/build/cwe_%s_%s_%s.out' % (filename, arch, compiler)) abs_path = os.path.abspath('test/artificial_samples/build/cwe_%s_%s_%s.out' % (filename, arch, compiler))
cmd = 'docker run --rm -v %s:/tmp/input cwe-checker:latest bap /tmp/input --pass=cwe-checker --cwe-checker-partial=CWE%s --cwe-checker-config=/home/bap/cwe_checker/src/config.json' % (abs_path, target) cmd = 'docker run --rm -v %s:/tmp/input cwe-checker:latest cwe_checker /tmp/input -partial=%s' % (abs_path, check_name)
else: else:
cmd = 'bap test/artificial_samples/build/cwe_%s_%s_%s.out --pass=cwe-checker --cwe-checker-partial=CWE%s --cwe-checker-config=src/config.json' % (filename, arch, compiler, target) cmd = 'cwe_checker test/artificial_samples/build/cwe_%s_%s_%s.out -partial=%s' % (filename, arch, compiler, check_name)
return cmd.split() return cmd.split()
...@@ -20,9 +22,9 @@ def build_bap_emulation_cmd(filename, target, arch, compiler): ...@@ -20,9 +22,9 @@ def build_bap_emulation_cmd(filename, target, arch, compiler):
return cmd.split() return cmd.split()
def execute_and_check_occurence(filename, target, arch, compiler, string): def execute_and_check_occurence(filename, target, arch, compiler, string, check_name = None):
occurence = 0 occurence = 0
bap_cmd = build_bap_cmd(filename, target, arch, compiler) bap_cmd = build_bap_cmd(filename, target, arch, compiler, check_name)
output = subprocess.check_output(bap_cmd) output = subprocess.check_output(bap_cmd)
for l in output.splitlines(): for l in output.splitlines():
if string in l: if string in l:
......
import unittest
import cwe_checker_testlib
class TestCwe415(unittest.TestCase):
def setUp(self):
self.target = '415'
self.string = b'Double Free'
def test_cwe415_01_x64_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'x64', 'gcc', self.string)
self.assertEqual(res, expect_res)
def test_cwe415_01_x64_clang(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'x64', 'clang', self.string)
self.assertEqual(res, expect_res)
def test_cwe415_01_x86_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'x86', 'gcc', self.string)
self.assertEqual(res, expect_res)
def test_cwe415_01_x86_clang(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'x86', 'clang', self.string)
self.assertEqual(res, expect_res)
def test_cwe415_01_arm_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'arm', 'gcc', self.string)
self.assertEqual(res, expect_res)
def test_cwe415_01_arm_clang(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'arm', 'clang', self.string)
self.assertEqual(res, expect_res)
@unittest.skip('Not supported by BAP. (no recognizable code backtrace)')
def test_cwe415_01_aarch64_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'aarch64', 'gcc', self.string)
self.assertEqual(res, expect_res)
@unittest.skip('Not supported by BAP. (no recognizable code backtrace)')
def test_cwe415_01_aarch64_clang(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'aarch64', 'clang', self.string)
self.assertEqual(res, expect_res)
@unittest.skip("Depends on proper MIPS support in BAP")
def test_cwe415_01_mips_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mips', 'gcc', self.string)
self.assertEqual(res, expect_res)
def test_cwe415_01_mips_clang(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'mips', 'clang', self.string)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME")
def test_cwe415_01_mipsel_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'mipsel', 'gcc', self.string)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME")
def test_cwe415_01_mipsel_clang(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'mipsel', 'clang', self.string)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME")
def test_cwe415_01_mips64_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'mips64', 'gcc', self.string)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME")
def test_cwe415_01_mips64_clang(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'mips64', 'clang', self.string)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME")
def test_cwe415_01_mips64el_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'mips64el', 'gcc', self.string)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME")
def test_cwe415_01_mips64el_clang(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'mips64el', 'clang', self.string)
self.assertEqual(res, expect_res)
@unittest.skip('FIXME: Check again when moved to BAP 2.1')
def test_cwe415_01_ppc_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'ppc', 'gcc', self.string)
self.assertEqual(res, expect_res)
@unittest.skip('Not supported by BAP. (no recognizable code backtrace)')
def test_cwe415_01_ppc64_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'ppc64', 'gcc', self.string)
self.assertEqual(res, expect_res)
@unittest.skip('Not supported by BAP. (no recognizable code backtrace)')
def test_cwe415_01_ppc64_clang(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'ppc64', 'clang', self.string)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME")
def test_cwe415_01_ppc64le_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'ppc64le', 'gcc', self.string)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME")
def test_cwe415_01_ppc64le_clang(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'ppc64le', 'clang', self.string)
self.assertEqual(res, expect_res)
@unittest.skip('FIXME: Only finds two of three possible double free')
def test_cwe415_01_x86_mingw_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'x86', 'mingw32-gcc', self.string)
self.assertEqual(res, expect_res)
@unittest.skip('FIXME: Only finds two of three possible double free')
def test_cwe415_01_x64_mingw_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'x64', 'mingw32-gcc', self.string)
self.assertEqual(res, expect_res)
...@@ -8,9 +8,9 @@ class TestCheckPath(unittest.TestCase): ...@@ -8,9 +8,9 @@ class TestCheckPath(unittest.TestCase):
def setUp(self): def setUp(self):
if 'travis' in os.environ['USER']: if 'travis' in os.environ['USER']:
abs_path = os.path.abspath('test/artificial_samples/build/check_path_x64_gcc.out') abs_path = os.path.abspath('test/artificial_samples/build/check_path_x64_gcc.out')
self.cmd = 'docker run --rm -v %s:/tmp/input cwe-checker:latest bap /tmp/input --pass=cwe-checker --cwe-checker-config=/home/bap/cwe_checker/src/config.json --cwe-checker-json --cwe-checker-check-path' % abs_path self.cmd = 'docker run --rm -v %s:/tmp/input cwe-checker:latest cwe_checker /tmp/input -config=/home/bap/cwe_checker/src/config.json -json -check-path -no-logging' % abs_path
else: else:
self.cmd = 'bap test/artificial_samples/build/check_path_x64_gcc.out --pass=cwe-checker --cwe-checker-config=src/config.json --cwe-checker-json --cwe-checker-check-path' self.cmd = 'cwe_checker test/artificial_samples/build/check_path_x64_gcc.out -config=src/config.json -json -check-path -no-logging'
def test_check_path_01_x64_gcc(self): def test_check_path_01_x64_gcc(self):
output = subprocess.check_output(self.cmd.split()) output = subprocess.check_output(self.cmd.split())
......
import unittest
import cwe_checker_testlib
class TestCwe415(unittest.TestCase):
def setUp(self):
self.target = '415'
self.string = b'Double Free'
self.check_name = 'Memory'
def test_cwe415_01_x64_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'x64', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
def test_cwe415_01_x64_clang(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'x64', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip('FIXME: Needs pointer alignment tracking, or else SP = SP & Const loses the stack offset')
def test_cwe415_01_x86_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'x86', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
def test_cwe415_01_x86_clang(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'x86', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res)
def test_cwe415_01_arm_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'arm', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
def test_cwe415_01_arm_clang(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'arm', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip('Not supported by BAP. (no recognizable code backtrace)')
def test_cwe415_01_aarch64_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'aarch64', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip('Not supported by BAP. (no recognizable code backtrace)')
def test_cwe415_01_aarch64_clang(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'aarch64', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME: Check again when BAP handles the ZERO register of MIPS.")
def test_cwe415_01_mips_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mips', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME: Check again when BAP handles the ZERO register of MIPS.")
def test_cwe415_01_mips_clang(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mips', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME: Check again when BAP handles the ZERO register of MIPS.")
def test_cwe415_01_mipsel_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mipsel', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME: Check again when BAP handles the ZERO register of MIPS.")
def test_cwe415_01_mipsel_clang(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mipsel', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip("BAP does not recognize extern calls")
def test_cwe415_01_mips64_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mips64', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip("BAP does not recognize extern calls")
def test_cwe415_01_mips64_clang(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mips64', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip("BAP does not recognize extern calls")
def test_cwe415_01_mips64el_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mips64el', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip("BAP does not recognize extern calls")
def test_cwe415_01_mips64el_clang(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mips64el', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res)
def test_cwe415_01_ppc_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'ppc', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip('Dynamic Symbol calls are mangled by BAP')
def test_cwe415_01_ppc64_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'ppc64', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip('Dynamic Symbol calls are mangled by BAP')
def test_cwe415_01_ppc64_clang(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'ppc64', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip('Dynamic Symbol calls are mangled by BAP')
def test_cwe415_01_ppc64le_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'ppc64le', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip('Dynamic Symbol calls are mangled by BAP')
def test_cwe415_01_ppc64le_clang(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'ppc64le', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip('FIXME')
def test_cwe415_01_x86_mingw_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'x86', 'mingw32-gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip('FIXME')
def test_cwe415_01_x64_mingw_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'x64', 'mingw32-gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
...@@ -7,157 +7,160 @@ class TestCwe416(unittest.TestCase): ...@@ -7,157 +7,160 @@ class TestCwe416(unittest.TestCase):
def setUp(self): def setUp(self):
self.target = '416' self.target = '416'
self.string = b'Use After Free' self.string = b'Use After Free'
self.check_name = 'Memory'
def test_cwe416_01_x64_gcc(self): def test_cwe416_01_x64_gcc(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'x64', 'gcc', self.string) self.target, self.target, 'x64', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
def test_cwe416_01_x64_clang(self): def test_cwe416_01_x64_clang(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'x64', 'clang', self.string) self.target, self.target, 'x64', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
@unittest.skip('FIXME: Needs pointer alignment tracking, or else SP = SP & Const loses the stack offset')
def test_cwe416_01_x86_gcc(self): def test_cwe416_01_x86_gcc(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'x86', 'gcc', self.string) self.target, self.target, 'x86', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
def test_cwe416_01_x86_clang(self): def test_cwe416_01_x86_clang(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'x86', 'clang', self.string) self.target, self.target, 'x86', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
def test_cwe416_01_arm_gcc(self): def test_cwe416_01_arm_gcc(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'arm', 'gcc', self.string) self.target, self.target, 'arm', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
def test_cwe416_01_arm_clang(self): def test_cwe416_01_arm_clang(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'arm', 'clang', self.string) self.target, self.target, 'arm', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
@unittest.skip('Not supported by BAP. (no recognizable code backtrace)') @unittest.skip('Not supported by BAP. (no recognizable code backtrace)')
def test_cwe416_01_aarch64_gcc(self): def test_cwe416_01_aarch64_gcc(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'aarch64', 'gcc', self.string) self.target, self.target, 'aarch64', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
@unittest.skip('Not supported by BAP. (no recognizable code backtrace)') @unittest.skip('Not supported by BAP. (no recognizable code backtrace)')
def test_cwe416_01_aarch64_clang(self): def test_cwe416_01_aarch64_clang(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'aarch64', 'clang', self.string) self.target, self.target, 'aarch64', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
@unittest.skip("Depends on proper MIPS support in BAP") @unittest.skip("FIXME: Check again when BAP handles the ZERO register of MIPS.")
def test_cwe416_01_mips_gcc(self): def test_cwe416_01_mips_gcc(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mips', 'gcc', self.string) self.target, self.target, 'mips', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
@unittest.skip("FIXME: Check again when BAP handles the ZERO register of MIPS.")
def test_cwe416_01_mips_clang(self): def test_cwe416_01_mips_clang(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mips', 'clang', self.string) self.target, self.target, 'mips', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
@unittest.skip("FIXME") @unittest.skip("FIXME: Check again when BAP handles the ZERO register of MIPS.")
def test_cwe416_01_mipsel_gcc(self): def test_cwe416_01_mipsel_gcc(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mipsel', 'gcc', self.string) self.target, self.target, 'mipsel', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
@unittest.skip("FIXME") @unittest.skip("FIXME: Check again when BAP handles the ZERO register of MIPS.")
def test_cwe416_01_mipsel_clang(self): def test_cwe416_01_mipsel_clang(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mipsel', 'clang', self.string) self.target, self.target, 'mipsel', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
@unittest.skip("FIXME") @unittest.skip("BAP does not recognize extern calls")
def test_cwe416_01_mips64_gcc(self): def test_cwe416_01_mips64_gcc(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mips64', 'gcc', self.string) self.target, self.target, 'mips64', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
@unittest.skip("FIXME") @unittest.skip("BAP does not recognize extern calls")
def test_cwe416_01_mips64_clang(self): def test_cwe416_01_mips64_clang(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mips64', 'clang', self.string) self.target, self.target, 'mips64', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
@unittest.skip("FIXME") @unittest.skip("BAP does not recognize extern calls")
def test_cwe416_01_mips64el_gcc(self): def test_cwe416_01_mips64el_gcc(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mips64el', 'gcc', self.string) self.target, self.target, 'mips64el', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
@unittest.skip("FIXME") @unittest.skip("BAP does not recognize extern calls")
def test_cwe416_01_mips64el_clang(self): def test_cwe416_01_mips64el_clang(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mips64el', 'clang', self.string) self.target, self.target, 'mips64el', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
@unittest.skip('FIXME: Check again when moved to BAP 2.1') #@unittest.skip('FIXME: Check again when moved to BAP 2.1')
def test_cwe416_01_ppc_gcc(self): def test_cwe416_01_ppc_gcc(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'ppc', 'gcc', self.string) self.target, self.target, 'ppc', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
@unittest.skip('Not supported by BAP. (no recognizable code backtrace)') @unittest.skip('Dynamic Symbol calls are mangled by BAP')
def test_cwe416_01_ppc64_gcc(self): def test_cwe416_01_ppc64_gcc(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'ppc64', 'gcc', self.string) self.target, self.target, 'ppc64', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
@unittest.skip('Not supported by BAP. (no recognizable code backtrace)') @unittest.skip('Dynamic Symbol calls are mangled by BAP')
def test_cwe416_01_ppc64_clang(self): def test_cwe416_01_ppc64_clang(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'ppc64', 'clang', self.string) self.target, self.target, 'ppc64', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
@unittest.skip("FIXME") @unittest.skip('Dynamic Symbol calls are mangled by BAP')
def test_cwe416_01_ppc64le_gcc(self): def test_cwe416_01_ppc64le_gcc(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'ppc64le', 'gcc', self.string) self.target, self.target, 'ppc64le', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
@unittest.skip("FIXME") @unittest.skip('Dynamic Symbol calls are mangled by BAP')
def test_cwe416_01_ppc64le_clang(self): def test_cwe416_01_ppc64le_clang(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'ppc64le', 'clang', self.string) self.target, self.target, 'ppc64le', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
@unittest.skip("FIXME") @unittest.skip("FIXME")
def test_cwe416_01_x86_mingw_gcc(self): def test_cwe416_01_x86_mingw_gcc(self):
expect_res = 2 expect_res = 2
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'x86', 'mingw32-gcc', self.string) self.target, self.target, 'x86', 'mingw32-gcc', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
@unittest.skip('FIXME: Check again when moved to BAP 2.1') @unittest.skip('FIXME')
def test_cwe416_01_x64_mingw_gcc(self): def test_cwe416_01_x64_mingw_gcc(self):
expect_res = 1 expect_res = 1
res = cwe_checker_testlib.execute_emulation_and_check_occurence( res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'x64', 'mingw32-gcc', self.string) self.target, self.target, 'x64', 'mingw32-gcc', self.string, self.check_name)
self.assertEqual(res, expect_res) self.assertEqual(res, expect_res)
...@@ -8,9 +8,9 @@ class TestJson(unittest.TestCase): ...@@ -8,9 +8,9 @@ class TestJson(unittest.TestCase):
def setUp(self): def setUp(self):
if 'travis' in os.environ['USER']: if 'travis' in os.environ['USER']:
abs_path = os.path.abspath('test/artificial_samples/build/cwe_190_x64_gcc.out') abs_path = os.path.abspath('test/artificial_samples/build/cwe_190_x64_gcc.out')
self.cmd = 'docker run --rm -v %s:/tmp/input cwe-checker:latest bap /tmp/input --pass=cwe-checker --cwe-checker-config=/home/bap/cwe_checker/src/config.json --cwe-checker-json' % abs_path self.cmd = 'docker run --rm -v %s:/tmp/input cwe-checker:latest cwe_checker /tmp/input -config=/home/bap/cwe_checker/src/config.json -json -no-logging' % abs_path
else: else:
self.cmd = 'bap test/artificial_samples/build/cwe_190_x64_gcc.out --pass=cwe-checker --cwe-checker-config=src/config.json --cwe-checker-json' self.cmd = 'cwe_checker test/artificial_samples/build/cwe_190_x64_gcc.out -config=src/config.json -json -no-logging'
def test_can_output_json(self): def test_can_output_json(self):
output = subprocess.check_output(self.cmd.split()) output = subprocess.check_output(self.cmd.split())
......
...@@ -8,7 +8,7 @@ all: ...@@ -8,7 +8,7 @@ all:
bap tmp/arrays_arm_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=Cconv bap tmp/arrays_arm_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=Cconv
bap tmp/arrays_mips64_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=Cconv bap tmp/arrays_mips64_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=Cconv
bap tmp/arrays_powerpc_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=Cconv bap tmp/arrays_powerpc_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=Cconv
bap tmp/arrays_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=MemRegion,TypeInference,CWE476 bap tmp/arrays_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=MemRegion,TypeInference,CWE476,SerdeJson
bap tmp/c_constructs_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=CWE560,AddrTrans bap tmp/c_constructs_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=CWE560,AddrTrans
bapbundle remove unit_tests_cwe_checker.plugin bapbundle remove unit_tests_cwe_checker.plugin
......
...@@ -11,7 +11,8 @@ ...@@ -11,7 +11,8 @@
(include_subdirs unqualified) ; Include all subdirs when looking for source files (include_subdirs unqualified) ; Include all subdirs when looking for source files
(alias (rule
(name runtest) (alias runtest)
(deps unit_tests_cwe_checker.exe) (deps unit_tests_cwe_checker.exe)
(action (run %{deps} --color=always))) (action (run %{deps} --color=always))
)
...@@ -34,6 +34,7 @@ let unit_test_list = [ ...@@ -34,6 +34,7 @@ let unit_test_list = [
"CWE476", Cwe_476_test.tests; "CWE476", Cwe_476_test.tests;
"CWE560", Cwe_560_test.tests; "CWE560", Cwe_560_test.tests;
"AddrTrans", Address_translation_test.tests; "AddrTrans", Address_translation_test.tests;
"SerdeJson", Serde_json_test.tests;
] ]
...@@ -57,13 +58,14 @@ let set_example_project (project : Project.t) (tests : string list) = ...@@ -57,13 +58,14 @@ let set_example_project (project : Project.t) (tests : string list) =
List.iter tests ~f:(fun test -> List.iter tests ~f:(fun test ->
match test with match test with
| "TypeInference" -> Type_inference_test.example_project := Some(project) | "TypeInference" -> Type_inference_test.example_project := Some(project)
| "Cconv" -> begin | "Cconv" -> begin
Cconv_test.example_project := Some(project); Cconv_test.example_project := Some(project);
Cconv_test.example_arch := Some(arch); Cconv_test.example_arch := Some(arch);
check_for_cconv project arch; check_for_cconv project arch;
Cconv_test.example_bin_format := Some(get_test_bin_format project) Cconv_test.example_bin_format := Some(get_test_bin_format project)
end end
| "CWE476" -> Cwe_476_test.example_project := Some(project) | "CWE476" -> Cwe_476_test.example_project := Some(project)
| "SerdeJson" -> Serde_json_test.example_project := Some(project)
| _ -> () | _ -> ()
) )
......
open Core_kernel
open Cwe_checker_core
open Bap.Std
let example_project = ref None
let check msg x = Alcotest.(check bool) msg true x
let test_serde () =
let open Serde_json in
let serde = build_null () in
let json = to_string serde in
print_endline json;
check "serde_null" (json = "null");
let serde = build_bool true in
let json = to_string serde in
print_endline json;
check "serde_bool" (json = "true");
let serde = build_number 45 in
let json = to_string serde in
print_endline json;
check "serde_number" (json = "45");
let serde = build_string "hello" in
let json = to_string serde in
print_endline json;
check "serde_string" (json = "\"hello\"");
let serde = build_array [build_number 23; build_bool false] in
let json = to_string serde in
print_endline json;
check "serde_array" (json = "[23,false]");
let serde = build_object [("hello", build_number 23); ("world", build_bool false)] in
let json = to_string serde in
print_endline json;
check "serde_object" (json = "{\"hello\":23,\"world\":false}")
let test_type_conversions () =
let var_type = Bil.Types.Mem (`r64, `r8) in
let serde = Serde_json.of_var_type var_type in
let json = Serde_json.to_string serde in
print_endline json;
check "Var_Type" (json = "{\"Memory\":{\"addr_size\":64,\"elem_size\":8}}");
let var = Var.create "RAX" var_type in
let serde = Serde_json.of_var var in
let json = Serde_json.to_string serde in
print_endline json;
check "Var" (json = "{\"is_temp\":false,\"name\":\"RAX\",\"type_\":{\"Memory\":{\"addr_size\":64,\"elem_size\":8}}}");
let cast_type = Bil.Types.UNSIGNED in
let serde = Serde_json.of_cast_type cast_type in
let json = Serde_json.to_string serde in
print_endline json;
check "Cast_Type" (json = "\"UNSIGNED\"");
let unop = Bil.Types.NEG in
let serde = Serde_json.of_unop_type unop in
let json = Serde_json.to_string serde in
print_endline json;
check "Unop_Type" (json = "\"NEG\"");
let bitv = Bitvector.of_int ~width:8 234 in
let serde = Serde_json.of_bitvector bitv in
let json = Serde_json.to_string serde in
print_endline json;
check "Bitvector" (json = "{\"digits\":[234],\"width\":[8]}");
let exp = Bil.binop Bil.PLUS (Bil.int bitv) (Bil.int bitv) in
let serde = Serde_json.of_exp exp in
let json = Serde_json.to_string serde in
print_endline json;
check "Expression" (json = "{\"BinOp\":{\"lhs\":{\"Const\":{\"digits\":[234],\"width\":[8]}},\"op\":\"PLUS\",\"rhs\":{\"Const\":{\"digits\":[234],\"width\":[8]}}}}");
let tid = Tid.for_name "block" in
let term = Blk.create ~tid () in
let tid_map = Tid.Map.empty in
let serde = Serde_json.of_blk term tid_map in
let json = Serde_json.to_string serde in
print_endline json;
check "Block_term" (json = "{\"term\":{\"defs\":[],\"jmps\":[]},\"tid\":{\"address\":\"UNKNOWN\",\"id\":\"@block\"}}";)
let test_project_conversion () =
let project = Option.value_exn !example_project in
let program = Project.program project in
let tid_map = Address_translation.generate_tid_map program in
let extern_symbols = Symbol_utils.build_and_return_extern_symbols project program tid_map in
let entry_points = [] in
let serde = Serde_json.of_program program extern_symbols entry_points tid_map in
let _json = Serde_json.to_string serde in
(* TODO: The unit test for pointer inference should be moved to another file *)
Pointer_inference.run project tid_map;
Log_utils.emit_json "bin" "";
Log_utils.emit_native "";
check "Project" true
let tests = [
"Serde Json Conversions", `Quick, test_serde;
"Type Conversions", `Quick, test_type_conversions;
"Project conversion", `Quick, test_project_conversion;
]
open Bap.Std
open Core_kernel
val example_project: Project.t option ref
val tests: unit Alcotest.test_case list
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment