Unverified Commit 8e575abc by Enkelmann Committed by GitHub

Rust interface (#70)

Added brand new (and still experimental) checks for CWEs 415 and 416 together with a new interprocedural data-flow analysis engine written in Rust. Add `-partial=Memory` as command line flag to try out the new checks.
parent fd91fb5c
...@@ -227,3 +227,15 @@ test/artificial_samples/dockcross* ...@@ -227,3 +227,15 @@ test/artificial_samples/dockcross*
.#* .#*
.sconsign.dblite .sconsign.dblite
### Rust ###
# Generated by Cargo
# will have compiled files and executables
/target/
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock
# These are backup files generated by rustfmt
**/*.rs.bk
#!/bin/bash #!/bin/bash
docker run --rm -t cwe-checker dune runtest && pytest docker run --rm -t cwe-checker cargo test && docker run --rm -t cwe-checker dune runtest && pytest
...@@ -4,8 +4,10 @@ dev ...@@ -4,8 +4,10 @@ dev
- Added a lot more test cases to acceptance tests (PR #46) - Added a lot more test cases to acceptance tests (PR #46)
- Reworked CWE-476 check to track stack variables (PR #47) - Reworked CWE-476 check to track stack variables (PR #47)
- Switched to BAP 2.0 (PR #49) - Switched to BAP 2.0 (PR #49)
- Several internal code improvements (PRs #51, #58, #62) - Several internal code improvements (PRs #51, #58, #62, #67)
- Added deprecation warnings to the emulation based checks (PR #66) - Added deprecation warnings to the emulation based checks (PR #66)
- Added a new (still experimental) engine for data-flow analysis written in Rust (PR #70)
- Added new, data-flow based checks for CWEs 415 and 416 (PR #70)
0.3 (2019-12) 0.3 (2019-12)
==== ====
......
[workspace]
members = ["cwe_checker_rs"]
...@@ -4,8 +4,9 @@ COPY . /home/bap/cwe_checker/ ...@@ -4,8 +4,9 @@ COPY . /home/bap/cwe_checker/
RUN sudo chown -R bap:bap /home/bap/cwe_checker \ RUN sudo chown -R bap:bap /home/bap/cwe_checker \
&& cd /home/bap/cwe_checker \ && cd /home/bap/cwe_checker \
&& make all && make clean && make all
WORKDIR /home/bap/cwe_checker WORKDIR /home/bap/cwe_checker
ENTRYPOINT ["opam", "config", "exec", "--"] ENTRYPOINT ["opam", "config", "exec", "--"]
CMD cwe_checker /tmp/input
.PHONY: all clean test uninstall docker .PHONY: all clean test uninstall docker
all: all:
cargo build --release
cp target/release/libcwe_checker_rs.a src/libcwe_checker_rs.a
cp target/release/libcwe_checker_rs.so src/dllcwe_checker_rs.so
dune build dune build
dune install dune install
cd plugins/cwe_checker; make all; cd ../.. cd plugins/cwe_checker; make all; cd ../..
cd plugins/cwe_checker_emulation; make all; cd ../.. cd plugins/cwe_checker_emulation; make all; cd ../..
cd plugins/cwe_checker_type_inference; make all; cd ../.. cd plugins/cwe_checker_type_inference; make all; cd ../..
cd plugins/cwe_checker_type_inference_print; make all; cd ../.. cd plugins/cwe_checker_type_inference_print; make all; cd ../..
cd plugins/cwe_checker_pointer_inference_debug; make all; cd ../..
test: test:
cargo test
cd test/unit/ && ./specify_test_files_for_compilation.sh cd test/unit/ && ./specify_test_files_for_compilation.sh
dune runtest dune runtest
cd test/artificial_samples; scons; cd ../.. cd test/artificial_samples; scons; cd ../..
pytest -v pytest -v --ignore=_build
clean: clean:
cargo clean
rm -f src/libcwe_checker_rs.a
rm -f src/dllcwe_checker_rs.so
dune clean dune clean
bapbuild -clean bapbuild -clean
rm -f -r doc/html rm -f -r doc/html
...@@ -22,6 +30,7 @@ clean: ...@@ -22,6 +30,7 @@ clean:
cd plugins/cwe_checker_emulation; make clean; cd ../.. cd plugins/cwe_checker_emulation; make clean; cd ../..
cd plugins/cwe_checker_type_inference; make clean; cd ../.. cd plugins/cwe_checker_type_inference; make clean; cd ../..
cd plugins/cwe_checker_type_inference_print; make clean; cd ../.. cd plugins/cwe_checker_type_inference_print; make clean; cd ../..
cd plugins/cwe_checker_pointer_inference_debug; make clean; cd ../..
uninstall: uninstall:
dune uninstall dune uninstall
...@@ -29,6 +38,7 @@ uninstall: ...@@ -29,6 +38,7 @@ uninstall:
cd plugins/cwe_checker_emulation; make uninstall; cd ../.. cd plugins/cwe_checker_emulation; make uninstall; cd ../..
cd plugins/cwe_checker_type_inference; make uninstall; cd ../.. cd plugins/cwe_checker_type_inference; make uninstall; cd ../..
cd plugins/cwe_checker_type_inference_print; make uninstall; cd ../.. cd plugins/cwe_checker_type_inference_print; make uninstall; cd ../..
cd plugins/cwe_checker_pointer_inference_debug; make uninstall; cd ../..
documentation: documentation:
dune build @doc dune build @doc
......
[package]
name = "cwe_checker_rs"
version = "0.1.0"
authors = ["Nils-Edvin Enkelmann <nils-edvin.enkelmann@fkie.fraunhofer.de>"]
edition = "2018"
[dependencies]
apint = "0.2"
serde = {version = "1.0", features = ["derive", "rc"]}
serde_json = "1.0"
serde_yaml = "0.8"
ocaml = "0.9.2"
petgraph = { version = "0.5", features = ["default", "serde-1"] }
fnv = "1.0" # a faster hash function for small keys like integers
anyhow = "1.0" # for easy error types
crossbeam-channel = "0.4"
[lib]
name = "cwe_checker_rs"
crate-type = ["staticlib", "cdylib"]
/*!
This module implements a generic fixpoint algorithm for dataflow analysis.
A fixpoint problem is defined as a graph where:
- Each node `n` gets assigned a value `val(n)` where the set of all values forms a partially ordered set.
- Each edge `e` defines a rule `e:value -> value` how to compute the value at the end node given the value at the start node of the edge.
A fixpoint is an assignment of values to all nodes of the graph so that for all edges
`e(val(start_node)) <= val(end_node)` holds.
For general information on dataflow analysis using fixpoint algorithms see [Wikipedia](https://en.wikipedia.org/wiki/Data-flow_analysis).
Or open an issue on github that you want more documentation here. :-)
*/
use fnv::FnvHashMap;
use petgraph::graph::{DiGraph, EdgeIndex, NodeIndex};
use petgraph::visit::EdgeRef;
use std::collections::{BTreeMap, BinaryHeap};
/// A fixpoint problem defines the context for a fixpoint computation.
///
/// All trait methods have access to the FixpointProblem structure, so that context informations are accessible through it.
pub trait Problem {
type EdgeLabel: Clone;
type NodeLabel;
type NodeValue: PartialEq + Eq;
fn get_graph(&self) -> &DiGraph<Self::NodeLabel, Self::EdgeLabel>;
/// This function describes how to merge two values
fn merge(&self, val1: &Self::NodeValue, val2: &Self::NodeValue) -> Self::NodeValue;
/// This function describes how the value at the end node of an edge is computed from the value at the start node of the edge.
/// The function can return None to indicate that no end value gets generated through this edge.
/// E.g. In a control flow graph, if the edge cannot be taken for the given start value, this function should return None.
fn update_edge(&self, value: &Self::NodeValue, edge: EdgeIndex) -> Option<Self::NodeValue>;
}
/// The computation struct contains an intermediate result of a fixpoint computation.
pub struct Computation<T: Problem> {
fp_problem: T,
node_priority_list: Vec<usize>, // maps a node index to its priority (higher priority nodes get stabilized first)
priority_to_node_list: Vec<NodeIndex>, // maps a priority to the corresponding node index
worklist: BinaryHeap<usize>,
default_value: Option<T::NodeValue>,
node_values: FnvHashMap<NodeIndex, T::NodeValue>,
}
impl<T: Problem> Computation<T> {
/// Create a new fixpoint computation from a fixpoint problem, the corresponding graph
/// and a default value for all nodes if one should exists.
pub fn new(fp_problem: T, default_value: Option<T::NodeValue>) -> Self {
let graph = fp_problem.get_graph();
// order the nodes in weak topological order
let sorted_nodes: Vec<NodeIndex> = petgraph::algo::kosaraju_scc(&graph)
.into_iter()
.flatten()
.rev()
.collect();
let mut node_to_index = BTreeMap::new();
for (i, node_index) in sorted_nodes.iter().enumerate() {
node_to_index.insert(node_index, i);
}
let node_priority_list: Vec<usize> = node_to_index.values().copied().collect();
let mut worklist = BinaryHeap::new();
// If a default value exists, all nodes are added to the worklist. If not, the worklist is empty
if default_value.is_some() {
for i in 0..sorted_nodes.len() {
worklist.push(i);
}
}
Computation {
fp_problem,
node_priority_list,
priority_to_node_list: sorted_nodes,
worklist,
default_value,
node_values: FnvHashMap::default(),
}
}
/// Get the value of a node.
pub fn get_node_value(&self, node: NodeIndex) -> Option<&T::NodeValue> {
if let Some(ref value) = self.node_values.get(&node) {
Some(value)
} else {
self.default_value.as_ref()
}
}
/// Set the value of a node and mark the node as not yet stabilized.
pub fn set_node_value(&mut self, node: NodeIndex, value: T::NodeValue) {
self.node_values.insert(node, value);
self.worklist.push(self.node_priority_list[node.index()]);
}
/// Merge the value at a node with some new value.
fn merge_node_value(&mut self, node: NodeIndex, value: T::NodeValue) {
if let Some(old_value) = self.node_values.get(&node) {
let merged_value = self.fp_problem.merge(&value, old_value);
if merged_value != *old_value {
self.set_node_value(node, merged_value);
}
} else {
self.set_node_value(node, value);
}
}
/// Compute and update the value at the end node of an edge.
fn update_edge(&mut self, edge: EdgeIndex) {
let (start_node, end_node) = self
.fp_problem
.get_graph()
.edge_endpoints(edge)
.expect("Edge not found");
if let Some(start_val) = self.node_values.get(&start_node) {
if let Some(new_end_val) = self.fp_problem.update_edge(start_val, edge) {
self.merge_node_value(end_node, new_end_val);
}
}
}
/// Update all outgoing edges of a node.
fn update_node(&mut self, node: NodeIndex) {
let edges: Vec<EdgeIndex> = self
.fp_problem
.get_graph()
.edges(node)
.map(|edge_ref| edge_ref.id())
.collect();
for edge in edges {
self.update_edge(edge);
}
}
/// Compute the fixpoint of the fixpoint problem.
/// Each node will be visited at most max_steps times.
/// If a node does not stabilize after max_steps visits, the end result will not be a fixpoint but only an intermediate result of a fixpoint computation.
pub fn compute_with_max_steps(&mut self, max_steps: u64) {
let mut steps = vec![0; self.fp_problem.get_graph().node_count()];
while let Some(priority) = self.worklist.pop() {
let node = self.priority_to_node_list[priority];
if steps[node.index()] < max_steps {
steps[node.index()] += 1;
self.update_node(node);
}
}
}
/// Compute the fixpoint of the fixpoint problem.
/// If the fixpoint algorithm does not converge to a fixpoint, this function will not terminate.
pub fn compute(&mut self) {
while let Some(priority) = self.worklist.pop() {
let node = self.priority_to_node_list[priority];
self.update_node(node);
}
}
/// Get a reference to the internal map where one can look up the current values of all nodes
pub fn node_values(&self) -> &FnvHashMap<NodeIndex, T::NodeValue> {
&self.node_values
}
/// Get a reference to the underlying graph
pub fn get_graph(&self) -> &DiGraph<T::NodeLabel, T::EdgeLabel> {
self.fp_problem.get_graph()
}
}
#[cfg(test)]
mod tests {
use super::*;
struct FPProblem {
graph: DiGraph<(), u64>,
}
impl Problem for FPProblem {
type EdgeLabel = u64;
type NodeLabel = ();
type NodeValue = u64;
fn get_graph(&self) -> &DiGraph<(), u64> {
&self.graph
}
fn merge(&self, val1: &Self::NodeValue, val2: &Self::NodeValue) -> Self::NodeValue {
std::cmp::min(*val1, *val2)
}
fn update_edge(&self, value: &Self::NodeValue, edge: EdgeIndex) -> Option<Self::NodeValue> {
Some(value + self.graph.edge_weight(edge).unwrap())
}
}
#[test]
fn fixpoint() {
let mut graph: DiGraph<(), u64> = DiGraph::new();
for _i in 0..101 {
graph.add_node(());
}
for i in 0..100 {
graph.add_edge(NodeIndex::new(i), NodeIndex::new(i + 1), i as u64 % 10 + 1);
}
for i in 0..10 {
graph.add_edge(NodeIndex::new(i * 10), NodeIndex::new(i * 10 + 5), 0);
}
graph.add_edge(NodeIndex::new(100), NodeIndex::new(0), 0);
let mut solution = Computation::new(FPProblem { graph }, None);
solution.set_node_value(NodeIndex::new(0), 0);
solution.compute_with_max_steps(20);
assert_eq!(30, *solution.get_node_value(NodeIndex::new(9)).unwrap());
assert_eq!(0, *solution.get_node_value(NodeIndex::new(5)).unwrap());
}
}
/*!
This module defines a trait for interprocedural fixpoint problems.
## Basic usage
Define a *Context* struct containing all information that does not change during the fixpoint computation.
In particular, this includes the graph on which the fixpoint computation is run.
Then implement the *Problem* trait for the *Context* struct.
The fixpoint computation can now be run as follows:
```
let context = MyContext::new(); // MyContext needs to implement Problem
let mut computation = Computation::new(context, None);
// add starting node values here with
computation.compute();
// computation is done, get solution node values here
```
*/
// TODO: When indirect jumps are sufficiently supported, the update_jump methods need access to
// target (and maybe source) nodes/TIDs, to determine which target the current edge points to.
// Alternatively, this could be achieved through usage of the specialize_conditional function.
// Currently unclear, which way is better.
use super::fixpoint::Problem as GeneralFPProblem;
use super::graph::*;
use crate::bil::Expression;
use crate::prelude::*;
use crate::term::*;
use fnv::FnvHashMap;
use petgraph::graph::{EdgeIndex, NodeIndex};
use std::marker::PhantomData;
#[derive(PartialEq, Eq, Serialize, Deserialize)]
pub enum NodeValue<T: PartialEq + Eq> {
Value(T),
CallReturnCombinator { call: Option<T>, return_: Option<T> },
}
impl<T: PartialEq + Eq> NodeValue<T> {
pub fn unwrap_value(&self) -> &T {
match self {
NodeValue::Value(value) => value,
_ => panic!("Unexpected node value type"),
}
}
}
/// An interprocedural fixpoint problem defines the context for a fixpoint computation.
///
/// All trait methods have access to the FixpointProblem structure, so that context informations are accessible through it.
pub trait Problem<'a> {
type Value: PartialEq + Eq + Clone;
fn get_graph(&self) -> &Graph<'a>;
fn merge(&self, value1: &Self::Value, value2: &Self::Value) -> Self::Value;
fn update_def(&self, value: &Self::Value, def: &Term<Def>) -> Self::Value;
fn update_jump(
&self,
value: &Self::Value,
jump: &Term<Jmp>,
untaken_conditional: Option<&Term<Jmp>>,
) -> Option<Self::Value>;
fn update_call(&self, value: &Self::Value, call: &Term<Jmp>, target: &Node) -> Self::Value;
fn update_return(
&self,
value: &Self::Value,
value_before_call: Option<&Self::Value>,
call_term: &Term<Jmp>,
) -> Option<Self::Value>;
fn update_call_stub(&self, value: &Self::Value, call: &Term<Jmp>) -> Option<Self::Value>;
fn specialize_conditional(
&self,
value: &Self::Value,
condition: &Expression,
is_true: bool,
) -> Option<Self::Value>;
}
/// This struct is a wrapper to create a general fixpoint problem out of an interprocedural fixpoint problem.
struct GeneralizedProblem<'a, T: Problem<'a>> {
problem: T,
_phantom_graph_reference: PhantomData<Graph<'a>>,
}
impl<'a, T: Problem<'a>> GeneralizedProblem<'a, T> {
pub fn new(problem: T) -> Self {
GeneralizedProblem {
problem,
_phantom_graph_reference: PhantomData,
}
}
}
impl<'a, T: Problem<'a>> GeneralFPProblem for GeneralizedProblem<'a, T> {
type EdgeLabel = Edge<'a>;
type NodeLabel = Node<'a>;
type NodeValue = NodeValue<T::Value>;
fn get_graph(&self) -> &Graph<'a> {
self.problem.get_graph()
}
fn merge(&self, val1: &Self::NodeValue, val2: &Self::NodeValue) -> Self::NodeValue {
use NodeValue::*;
match (val1, val2) {
(Value(value1), Value(value2)) => Value(self.problem.merge(value1, value2)),
(
CallReturnCombinator {
call: call1,
return_: return1,
},
CallReturnCombinator {
call: call2,
return_: return2,
},
) => CallReturnCombinator {
call: merge_option(call1, call2, |v1, v2| self.problem.merge(v1, v2)),
return_: merge_option(return1, return2, |v1, v2| self.problem.merge(v1, v2)),
},
_ => panic!("Malformed CFG in fixpoint computation"),
}
}
fn update_edge(
&self,
node_value: &Self::NodeValue,
edge: EdgeIndex,
) -> Option<Self::NodeValue> {
let graph = self.problem.get_graph();
let (start_node, end_node) = graph.edge_endpoints(edge).unwrap();
let block_term = graph.node_weight(start_node).unwrap().get_block();
match graph.edge_weight(edge).unwrap() {
Edge::Block => {
let value = node_value.unwrap_value();
let defs = &block_term.term.defs;
let end_val = defs.iter().fold(value.clone(), |accum, def| {
self.problem.update_def(&accum, def)
});
Some(NodeValue::Value(end_val))
}
Edge::Call(call) => Some(NodeValue::Value(self.problem.update_call(
node_value.unwrap_value(),
call,
&graph[end_node],
))),
Edge::CRCallStub => Some(NodeValue::CallReturnCombinator {
call: Some(node_value.unwrap_value().clone()),
return_: None,
}),
Edge::CRReturnStub => Some(NodeValue::CallReturnCombinator {
call: None,
return_: Some(node_value.unwrap_value().clone()),
}),
Edge::CRCombine(call_term) => match node_value {
NodeValue::Value(_) => panic!("Unexpected interprocedural fixpoint graph state"),
NodeValue::CallReturnCombinator { call, return_ } => {
if let Some(return_value) = return_ {
match self
.problem
.update_return(return_value, call.as_ref(), call_term)
{
Some(val) => Some(NodeValue::Value(val)),
None => None,
}
} else {
None
}
}
},
Edge::ExternCallStub(call) => self
.problem
.update_call_stub(node_value.unwrap_value(), call)
.map(|val| NodeValue::Value(val)),
Edge::Jump(jump, untaken_conditional) => self
.problem
.update_jump(node_value.unwrap_value(), jump, *untaken_conditional)
.map(|val| NodeValue::Value(val)),
}
}
}
/// This struct contains an intermediate result of an interprocedural fixpoint cumputation.
pub struct Computation<'a, T: Problem<'a>> {
generalized_computation: super::fixpoint::Computation<GeneralizedProblem<'a, T>>,
}
impl<'a, T: Problem<'a>> Computation<'a, T> {
/// Generate a new computation from the corresponding problem and a default value for nodes.
pub fn new(problem: T, default_value: Option<T::Value>) -> Self {
let generalized_problem = GeneralizedProblem::new(problem);
let computation = super::fixpoint::Computation::new(
generalized_problem,
default_value.map(|val| NodeValue::Value(val)),
);
Computation {
generalized_computation: computation,
}
}
/// Compute the fixpoint.
/// Note that this function does not terminate if the fixpoint algorithm does not stabilize
pub fn compute(&mut self) {
self.generalized_computation.compute()
}
/// Compute the fixpoint while updating each node at most max_steps times.
/// Note that the result may not be a stabilized fixpoint, but only an intermediate result of a fixpoint computation.
pub fn compute_with_max_steps(&mut self, max_steps: u64) {
self.generalized_computation
.compute_with_max_steps(max_steps)
}
/// Get the value of a node.
pub fn get_node_value(&self, node: NodeIndex) -> Option<&NodeValue<T::Value>> {
self.generalized_computation.get_node_value(node)
}
/// Set the value of a node and mark the node as not yet stabilized
pub fn set_node_value(&mut self, node: NodeIndex, value: NodeValue<T::Value>) {
self.generalized_computation.set_node_value(node, value)
}
/// Get a reference to the internal map where one can look up the current values of all nodes
pub fn node_values(&self) -> &FnvHashMap<NodeIndex, NodeValue<T::Value>> {
self.generalized_computation.node_values()
}
/// Get a reference to the underlying graph
pub fn get_graph(&self) -> &Graph {
self.generalized_computation.get_graph()
}
}
fn merge_option<T: Clone, F>(opt1: &Option<T>, opt2: &Option<T>, merge: F) -> Option<T>
where
F: Fn(&T, &T) -> T,
{
match (opt1, opt2) {
(Some(value1), Some(value2)) => Some(merge(value1, value2)),
(Some(value), None) | (None, Some(value)) => Some(value.clone()),
(None, None) => None,
}
}
pub mod abstract_domain;
pub mod fixpoint;
pub mod graph;
pub mod interprocedural_fixpoint;
pub mod mem_region;
pub mod pointer_inference;
use crate::bil::variable::*;
use crate::prelude::*;
use crate::utils::fast_cmp_arc::FastCmpArc;
use std::sync::Arc;
// TODO: Right now abstract locations are used as giving the location where a pointer to an object is located.
// But it could also be used to point into the object (at offset 0).
// Can I solve this possible ambivalence in intended usage in a way such that accidentally wrong usage is prevented?
// If not, I have to document the intended usage with a big warning sign.
/// An abstract identifier is given by a time identifier and a location identifier.
///
/// For the location identifier see `AbstractLocation`.
/// The time identifier is given by a `Tid`.
/// If it is the Tid of a basic block, then it describes the point in time *before* execution of the first instruction in the block.
/// If it is the Tid of a Def or Jmp, then it describes the point in time *after* the execution of the Def or Jmp.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub struct AbstractIdentifier(FastCmpArc<AbstractIdentifierData>);
/// The data contained in an abstract identifier
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub struct AbstractIdentifierData {
time: Tid,
location: AbstractLocation,
}
impl AbstractIdentifier {
/// create a new abstract identifier
pub fn new(time: Tid, location: AbstractLocation) -> AbstractIdentifier {
AbstractIdentifier(FastCmpArc(Arc::new(AbstractIdentifierData {
time,
location,
})))
}
}
impl std::fmt::Display for AbstractIdentifier {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(formatter, "{} @ {}", self.0.time, self.0.location)
}
}
/// An abstract location describes how to find the value of a variable in memory at a given time.
///
/// It is defined recursively, where the root is always a register.
/// This way only locations that the local state knows about are representable.
/// It is also impossible to accidently describe circular references.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum AbstractLocation {
Register(String, BitSize),
Pointer(String, AbstractMemoryLocation),
}
impl std::fmt::Display for AbstractLocation {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Register(name, _size) => write!(formatter, "{}", name),
Self::Pointer(reg_name, location) => write!(formatter, "{}->{}", reg_name, location),
}
}
}
impl AbstractLocation {
/// Create an abstract location from a variable corresponding to a register.
/// This function returns an error if the variable is not a physical register.
pub fn from_var(variable: &Variable) -> Result<AbstractLocation, Error> {
if variable.is_temp {
return Err(anyhow!(
"Cannot create abstract location from temporary variables."
));
}
Ok(AbstractLocation::Register(
variable.name.clone(),
variable.bitsize()?,
))
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum AbstractMemoryLocation {
Location {
offset: isize,
size: usize,
},
Pointer {
offset: isize,
size: usize,
target: Box<AbstractMemoryLocation>,
},
}
impl std::fmt::Display for AbstractMemoryLocation {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Location { offset, .. } => write!(formatter, "({})", offset),
Self::Pointer {
offset,
size: _,
target,
} => write!(formatter, "({})->{}", offset, target),
}
}
}
use serde::{Deserialize, Serialize};
pub mod variable;
pub use variable::*;
pub type Bitvector = apint::ApInt;
pub type BitSize = u16;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Expression {
Var(Variable),
Const(Bitvector),
Load {
memory: Box<Expression>,
address: Box<Expression>,
endian: Endianness,
size: BitSize,
},
Store {
memory: Box<Expression>,
address: Box<Expression>,
value: Box<Expression>,
endian: Endianness,
size: BitSize,
},
BinOp {
op: BinOpType,
lhs: Box<Expression>,
rhs: Box<Expression>,
},
UnOp {
op: UnOpType,
arg: Box<Expression>,
},
Cast {
kind: CastType,
width: BitSize,
arg: Box<Expression>,
},
Let {
var: Variable,
bound_exp: Box<Expression>,
body_exp: Box<Expression>,
},
Unknown {
description: String,
type_: Type,
},
IfThenElse {
condition: Box<Expression>,
true_exp: Box<Expression>,
false_exp: Box<Expression>,
},
Extract {
low_bit: BitSize,
high_bit: BitSize,
arg: Box<Expression>,
},
Concat {
left: Box<Expression>,
right: Box<Expression>,
},
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum CastType {
UNSIGNED,
SIGNED,
HIGH,
LOW,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum BinOpType {
PLUS,
MINUS,
TIMES,
DIVIDE,
SDIVIDE,
MOD,
SMOD,
LSHIFT,
RSHIFT,
ARSHIFT,
AND,
OR,
XOR,
EQ,
NEQ,
LT,
LE,
SLT,
SLE,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum UnOpType {
NEG,
NOT,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum Endianness {
LittleEndian,
BigEndian,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn variant_deserialization() {
let string = "\"UNSIGNED\"";
assert_eq!(CastType::UNSIGNED, serde_json::from_str(string).unwrap());
let string = "\"NEG\"";
assert_eq!(UnOpType::NEG, serde_json::from_str(string).unwrap());
}
#[test]
fn bitvector_deserialization() {
let bitv = Bitvector::from_u64(234);
let string = serde_json::to_string(&bitv).unwrap();
println!("{}", string);
println!("{:?}", bitv);
let string = "{\"digits\":[234],\"width\":[64]}";
assert_eq!(bitv, serde_json::from_str(string).unwrap());
}
#[test]
fn expression_deserialization() {
let string = "{\"BinOp\":{\"lhs\":{\"Const\":{\"digits\":[234],\"width\":[8]}},\"op\":\"PLUS\",\"rhs\":{\"Const\":{\"digits\":[234],\"width\":[8]}}}}";
let bitv = Bitvector::from_u8(234);
let exp = Expression::BinOp {
op: BinOpType::PLUS,
lhs: Box::new(Expression::Const(bitv.clone())),
rhs: Box::new(Expression::Const(bitv)),
};
println!("{}", serde_json::to_string(&exp).unwrap());
assert_eq!(exp, serde_json::from_str(string).unwrap())
}
}
use super::BitSize;
use crate::prelude::*;
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
pub struct Variable {
pub name: String,
pub type_: Type,
pub is_temp: bool,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)]
pub enum Type {
Immediate(BitSize),
Memory {
addr_size: BitSize,
elem_size: BitSize,
},
Unknown,
}
impl Variable {
pub fn bitsize(&self) -> Result<BitSize, Error> {
if let Type::Immediate(bitsize) = self.type_ {
Ok(bitsize)
} else {
Err(anyhow!("Not a register variable"))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn type_deserialization() {
let typ = Type::Immediate(64);
let string = serde_json::to_string_pretty(&typ).expect("Serialization failed");
println!("{}", &string);
let _: Type = serde_json::from_str(&string).expect("Deserialization failed");
let typ = Type::Memory {
addr_size: 64,
elem_size: 8,
};
let string = serde_json::to_string_pretty(&typ).expect("Serialization failed");
println!("{}", &string);
let _: Type = serde_json::from_str(&string).expect("Deserialization failed");
let typ = Type::Unknown;
let string = serde_json::to_string_pretty(&typ).expect("Serialization failed");
println!("{}", &string);
let _: Type = serde_json::from_str(&string).expect("Deserialization failed");
}
#[test]
fn var_type_from_ocaml() {
let json_string = "{\"Memory\":{\"addr_size\":64,\"elem_size\":8}}";
let typ = Type::Memory {
addr_size: 64,
elem_size: 8,
};
assert_eq!(typ, serde_json::from_str(json_string).unwrap())
}
#[test]
fn var_from_ocaml() {
let json_string = "{\"is_temp\":false,\"name\":\"RAX\",\"type_\":{\"Memory\":{\"addr_size\":64,\"elem_size\":8}}}";
let var = Variable {
name: "RAX".to_string(),
type_: Type::Memory {
addr_size: 64,
elem_size: 8,
},
is_temp: false,
};
assert_eq!(var, serde_json::from_str(json_string).unwrap())
}
}
use super::serde::JsonBuilder;
use super::OcamlSendable;
use crate::term::*;
use crate::utils::log::CweWarning;
use super::failwith_on_panic;
fn run_pointer_inference(program_jsonbuilder_val: ocaml::Value) -> (Vec<CweWarning>, Vec<String>) {
let json_builder = unsafe { JsonBuilder::from_ocaml(&program_jsonbuilder_val) };
let program_json = serde_json::Value::from(json_builder);
let project: Project =
serde_json::from_value(program_json).expect("Project deserialization failed");
crate::analysis::pointer_inference::run(&project, false)
}
caml!(rs_run_pointer_inference(program_jsonbuilder_val) {
return failwith_on_panic( || {
let cwe_warnings_and_log = run_pointer_inference(program_jsonbuilder_val);
let cwe_warnings_and_log_json = serde_json::to_string(&cwe_warnings_and_log).unwrap();
let ocaml_string = ocaml::Str::from(&cwe_warnings_and_log_json as &str);
ocaml::Value::from(ocaml_string)
});
});
fn run_pointer_inference_and_print_debug(program_jsonbuilder_val: ocaml::Value) {
let json_builder = unsafe { JsonBuilder::from_ocaml(&program_jsonbuilder_val) };
let program_json = serde_json::Value::from(json_builder);
let project: Project =
serde_json::from_value(program_json).expect("Project deserialization failed");
crate::analysis::pointer_inference::run(&project, true); // TODO: This discard all CweWarnings and log messages. Change that?
}
caml!(rs_run_pointer_inference_and_print_debug(program_jsonbuilder_val) {
return failwith_on_panic( || {
run_pointer_inference_and_print_debug(program_jsonbuilder_val);
ocaml::Value::unit()
});
});
/*!
# Foreign Function Interface
This module contains all functions that interact with Ocaml via the foreign function interface.
*/
use std::rc::Rc;
pub mod analysis;
pub mod serde;
/// Helper function for catching panics at the ffi-border.
/// If a panic occurs while executing F and that panic unwinds the stack,
/// the panic is caught and an Ocaml failwith exception is thrown instead.
///
/// Stack unwinding through a panic across a ffi-boundary is undefined behaviour.
/// As of Rust 1.41 catching panics at ffi-borders is still not the default behaviour,
/// since it would break backwards compatibility with some crates depending on this undefined behaviour.
/// Throwing an Ocaml failwith exception instead allows stack unwinding and better error messages.
/// Note that the Ocaml exception should *not* be caught,
/// since recovering from it may lead to undefined behavior on the Rust side.
fn failwith_on_panic<F, T>(closure: F) -> T
where
F: FnOnce() -> T,
{
match std::panic::catch_unwind(std::panic::AssertUnwindSafe(closure)) {
Ok(value) => value,
Err(_) => {
// Throw an Ocaml failwith-exception.
// This may not be safe if the exception is caught and recovered from on the Ocaml side!
// We assume that these errors are only caught for error printing but not for recovering from it.
ocaml::runtime::failwith("Rust-Panic catched at FFI-boundary");
std::process::abort();
}
}
}
/// This is a convenience trait for objects that may be sent as opaque objects across the ffi-boundary to Ocaml.
/// For that they are wrapped as Rc<T>.
/// Note that this trait does not prevent memory leaks in itself!
/// Whenever such an object is created and sent across the ffi-boundary,
/// the finalizer must be attached to it on the Ocaml side!
trait OcamlSendable: std::marker::Sized {
/// Pack the object into an Ocaml value
fn to_ocaml(self) -> ocaml::Value {
let boxed_val = Rc::new(self);
ocaml::Value::nativeint(Rc::into_raw(boxed_val) as isize)
}
/// Unpack an object that is stored as a `Rc<T>` wrapped in an Ocaml value.
///
/// Note that the caller has to ensure that the wrapped object has the correct type.
unsafe fn from_ocaml(ocaml_val: &ocaml::Value) -> &Self {
let ptr: *const Self = ocaml_val.nativeint_val() as *const Self;
ptr.as_ref().unwrap()
}
/// Unpack a `Rc<T>` object wrapped in an Ocaml value and return a clone of it.
///
/// Note that the caller has to ensure that the wrapped object has the correct type.
unsafe fn from_ocaml_rc(ocaml_val: &ocaml::Value) -> Rc<Self> {
let ptr: *const Self = ocaml_val.nativeint_val() as *const Self;
let rc_box = Rc::from_raw(ptr);
let rc_clone = rc_box.clone(); // Increasing the reference count by 1
let _ = Rc::into_raw(rc_box); // Do not decrease the reference count when rc_box goes out of scope!
rc_clone
}
fn ocaml_finalize(ocaml_val: ocaml::Value) {
let ptr: *const Self = ocaml_val.nativeint_val() as *const Self;
let _ = unsafe { Rc::from_raw(ptr) };
}
}
/*!
# FFI-functions for generating serde_json objects
This module defines functions for generating opaque serde_json::Value objects in Ocaml
which can then be deserialized with Serde on the Rust side. Signatures of the provided functions:
```Ocaml
type serde_json = nativeint (* This stores pointers, so treat this as an opaque type! *)
external build_null: unit -> serde_json = "rs_build_serde_null"
external build_bool: bool -> serde_json = "rs_build_serde_bool"
external build_number: int -> serde_json = "rs_build_serde_number"
external build_string: string -> serde_json = "rs_build_serde_string"
external build_array: serde_json list -> serde_json = "rs_build_serde_array_from_list"
external build_object: (string * serde_json) list -> serde_json = "rs_build_serde_object"
external build_bitvector: string -> serde_json = "rs_build_serde_bitvector"
(* Convert a serde_json object to a json string (used for unit tests). *)
external to_string: serde_json -> string = "rs_convert_json_to_string"
```
*/
use super::OcamlSendable;
use ocaml::{FromValue, ToValue};
use std::iter::FromIterator;
use std::rc::Rc;
use std::str::FromStr;
use super::failwith_on_panic;
/// A builder type for serde_json::Value objects.
///
/// Hiding the recursive nature of the data type behind reference counts prevents unneccessary
/// deep copies when creating json objects from Ocaml, which would lead to a runtime quadratic in the size of the json object.
/// However, when converting to serde_json::Value, one deep copy is still necessary.
#[derive(Clone, Debug)]
pub enum JsonBuilder {
Null,
Bool(bool),
Number(isize),
PositiveNumber(u64), // currently used only for deserialization of bitvector
String(String),
Array(Vec<Rc<JsonBuilder>>),
Object(Vec<(String, Rc<JsonBuilder>)>),
}
impl OcamlSendable for JsonBuilder {}
/// Creating a serde_json::Value performing deep copy.
impl From<&JsonBuilder> for serde_json::Value {
fn from(builder: &JsonBuilder) -> serde_json::Value {
match builder {
JsonBuilder::Null => serde_json::Value::Null,
JsonBuilder::Bool(val) => serde_json::Value::Bool(*val),
JsonBuilder::Number(val) => serde_json::Value::Number(serde_json::Number::from(*val)),
JsonBuilder::PositiveNumber(val) => {
serde_json::Value::Number(serde_json::Number::from(*val))
}
JsonBuilder::String(val) => serde_json::Value::String(val.to_string()),
JsonBuilder::Array(elem_vec) => elem_vec
.iter()
.map(|rc_elem| serde_json::Value::from(&**rc_elem))
.collect(),
JsonBuilder::Object(tuple_vec) => serde_json::Value::Object(
serde_json::Map::from_iter(tuple_vec.iter().map(|(string_ref, json_builder)| {
(
string_ref.to_string(),
serde_json::Value::from(&**json_builder),
)
})),
),
}
}
}
caml!(rs_finalize_json_builder(builder_val) {
return failwith_on_panic( || {
JsonBuilder::ocaml_finalize(builder_val);
ocaml::Value::unit()
});
});
/// Build JsonBuilder::Null as Ocaml value
fn build_serde_null() -> ocaml::Value {
JsonBuilder::Null.to_ocaml()
}
caml!(rs_build_serde_null(_unit) {
return failwith_on_panic( || {
build_serde_null()
});
});
/// Build JsonBuilder::Bool as Ocaml value
fn build_serde_bool(bool_val: ocaml::Value) -> ocaml::Value {
let boolean: bool = bool::from_value(bool_val);
JsonBuilder::Bool(boolean).to_ocaml()
}
caml!(rs_build_serde_bool(bool_val) {
return failwith_on_panic( || {
build_serde_bool(bool_val)
});
});
/// Build JsonBuilder::Number as Ocaml value
fn build_serde_number(num: ocaml::Value) -> ocaml::Value {
let num: isize = ocaml::Value::isize_val(&num);
JsonBuilder::Number(num).to_ocaml()
}
caml!(rs_build_serde_number(number) {
return failwith_on_panic( || {
build_serde_number(number)
});
});
/// Build JsonBuilder::Object representing a bitvector from a string generated by `Bitvector.to_string` in Ocaml
fn build_serde_bitvector(bitvector_string_val: ocaml::Value) -> ocaml::Value {
let string = <&str>::from_value(bitvector_string_val);
let elements: Vec<&str> = string.split(':').collect();
let width = usize::from_str(&elements[1][0..(elements[1].len() - 1)])
.expect("Bitvector width parsing failed");
assert!(width > 0);
let mut num_list = Vec::new();
let mut number_slice: &str = elements[0];
if number_slice.starts_with("0x") {
number_slice = &number_slice[2..];
}
while number_slice.len() > 0 {
if number_slice.len() > 16 {
let digit = u64::from_str_radix(&number_slice[(number_slice.len() - 16)..], 16)
.expect("Bitvector value parsing failed");
num_list.push(Rc::new(JsonBuilder::PositiveNumber(digit)));
number_slice = &number_slice[..(number_slice.len() - 16)];
} else {
let digit =
u64::from_str_radix(&number_slice, 16).expect("Bitvector value parsing failed");
num_list.push(Rc::new(JsonBuilder::PositiveNumber(digit)));
number_slice = "";
};
}
while num_list.len() <= (width - 1) / 64 {
num_list.push(Rc::new(JsonBuilder::PositiveNumber(0)));
}
num_list.reverse(); // since the digits were parsed in reverse order
let mut width_list = Vec::new();
width_list.push(Rc::new(JsonBuilder::Number(width as isize)));
let result = JsonBuilder::Object(vec![
("digits".to_string(), Rc::new(JsonBuilder::Array(num_list))),
("width".to_string(), Rc::new(JsonBuilder::Array(width_list))),
]);
// TODO: remove deserialization check
let check_serde = serde_json::to_string(&serde_json::Value::from(&result)).unwrap();
let _bitv: apint::ApInt = serde_json::from_str(&check_serde)
.expect(&format!("Invalid value generated: {}", check_serde));
result.to_ocaml()
}
caml!(rs_build_serde_bitvector(bitvector_string) {
return failwith_on_panic( || {
build_serde_bitvector(bitvector_string)
});
});
/// Build JsonBuilder::String as Ocaml value
fn build_serde_string(string_val: ocaml::Value) -> ocaml::Value {
let string = String::from_value(string_val);
JsonBuilder::String(string).to_ocaml()
}
caml!(rs_build_serde_string(string_val) {
return failwith_on_panic( || {
build_serde_string(string_val)
});
});
/// Build JsonBuilder::Array as Ocaml value from an Ocaml list
fn build_serde_array_from_list(list_val: ocaml::Value) -> ocaml::Value {
let ocaml_list = ocaml::List::from(list_val);
let value_vec = ocaml_list.to_vec();
let vec = value_vec
.into_iter()
.map(|ocaml_val| unsafe { JsonBuilder::from_ocaml_rc(&ocaml_val) })
.collect();
JsonBuilder::Array(vec).to_ocaml()
}
caml!(rs_build_serde_array_from_list(list_val) {
return failwith_on_panic( || {
build_serde_array_from_list(list_val)
});
});
/// Build JsonBuilder::Object as Ocaml value from an Ocaml list of tuples
fn build_serde_object(tuple_list_val: ocaml::Value) -> ocaml::Value {
let ocaml_list = ocaml::List::from(tuple_list_val);
let pairs_vec = ocaml_list.to_vec();
let pairs = pairs_vec
.into_iter()
.map(|ocaml_tuple| {
let tuple = ocaml::Tuple::from(ocaml_tuple);
let key_ocaml = tuple
.get(0)
.expect("Error: Ocaml tuple contains no element");
let key = String::from_value(key_ocaml);
let value_ocaml: ocaml::Value = tuple
.get(1)
.expect("Error: Ocaml tuple contains not enough elements");
let data = unsafe { JsonBuilder::from_ocaml_rc(&value_ocaml) };
(key, data)
})
.collect();
JsonBuilder::Object(pairs).to_ocaml()
}
caml!(rs_build_serde_object(tuple_list_val) {
return failwith_on_panic( || {
build_serde_object(tuple_list_val)
});
});
/// Get the Json string corresponding to a JsonBuilder object and return it as an Ocaml value.
fn get_json_string(builder_val: ocaml::Value) -> ocaml::Value {
let builder = unsafe { JsonBuilder::from_ocaml(&builder_val) };
let json_string = serde_json::Value::from(builder).to_string();
ocaml::Str::from(&json_string as &str).to_value()
}
caml!(rs_convert_json_to_string(builder_val) {
return failwith_on_panic( || {
get_json_string(builder_val)
});
});
/*!
# cwe_checker_rs
Parts of the cwe_checker that are written in Rust.
*/
#[macro_use]
extern crate ocaml;
pub mod analysis;
pub mod bil;
pub mod ffi;
pub mod term;
pub mod utils;
mod prelude {
pub use apint::Width;
pub use serde::{Deserialize, Serialize};
pub use crate::bil::{BitSize, Bitvector};
pub use crate::term::Tid;
pub use anyhow::{anyhow, Error};
}
#[cfg(test)]
mod tests {
#[test]
fn it_works() {
assert_eq!(2 + 2, 4);
}
}
use crate::bil::*;
use serde::{Deserialize, Serialize};
pub mod symbol;
use symbol::ExternSymbol;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub struct Tid {
id: String,
pub address: String,
}
impl Tid {
pub fn new<T: ToString>(val: T) -> Tid {
Tid {
id: val.to_string(),
address: "UNKNOWN".to_string(),
}
}
}
impl std::fmt::Display for Tid {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(formatter, "{}", self.id)
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Term<T> {
pub tid: Tid,
pub term: T,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Def {
pub lhs: Variable,
pub rhs: Expression,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Jmp {
pub condition: Option<Expression>,
pub kind: JmpKind,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum JmpKind {
Call(Call),
Goto(Label),
Return(Label),
Interrupt { value: isize, return_addr: Tid },
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Call {
pub target: Label,
pub return_: Option<Label>,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Label {
Direct(Tid),
Indirect(Expression),
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Blk {
pub defs: Vec<Term<Def>>,
pub jmps: Vec<Term<Jmp>>,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Sub {
pub name: String,
pub blocks: Vec<Term<Blk>>,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Program {
pub subs: Vec<Term<Sub>>,
pub extern_symbols: Vec<ExternSymbol>,
pub entry_points: Vec<Tid>,
}
// TODO: Add deserialization from Ocaml to the FFI module for project!
// TODO: Add other CPU-architecture specific data to this struct!
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Project {
pub program: Term<Program>,
pub cpu_architecture: String,
pub stack_pointer_register: Variable,
pub callee_saved_registers: Vec<String>,
pub parameter_registers: Vec<String>,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Arg {
pub var: Variable,
pub location: Expression,
pub intent: ArgIntent,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum ArgIntent {
Input,
Output,
Both,
Unknown,
}
impl ArgIntent {
pub fn is_input(&self) -> bool {
match self {
Self::Input | Self::Both | Self::Unknown => true,
Self::Output => false,
}
}
pub fn is_output(&self) -> bool {
match self {
Self::Output | Self::Both | Self::Unknown => true,
Self::Input => false,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn term_deserialization() {
let string = "{\"term\":{\"defs\":[],\"jmps\":[]},\"tid\":{\"id\":\"@block\",\"address\":\"UNKNOWN\"}}";
let tid = Tid::new("@block".to_string());
let block_term = Term {
tid,
term: Blk {
defs: Vec::new(),
jmps: Vec::new(),
},
};
println!("{}", serde_json::to_string(&block_term).unwrap());
assert_eq!(block_term, serde_json::from_str(&string).unwrap());
}
}
use super::Arg;
use crate::bil::*;
use crate::prelude::*;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct ExternSymbol {
pub tid: Tid,
pub address: String,
pub name: String,
pub calling_convention: Option<String>,
pub arguments: Vec<Arg>,
}
impl ExternSymbol {
/// Returns the return register of an extern symbol.
/// Returns an error if the function has not exactly one return argument
/// or if the return argument is not a register.
pub fn get_unique_return_register(&self) -> Result<&crate::bil::variable::Variable, Error> {
let return_args: Vec<_> = self
.arguments
.iter()
.filter(|arg| arg.intent.is_output())
.collect();
if return_args.len() != 1 {
return Err(anyhow!(
"Wrong number of return register: Got {}, expected 1",
return_args.len()
));
}
match &return_args[0].location {
Expression::Var(var) => Ok(var),
_ => Err(anyhow!("Return location is not a register"))?,
}
}
/// Returns the parameter expression of an extern symbol.
/// Returns an error if the function has not exactly one parameter argument.
pub fn get_unique_parameter(&self) -> Result<&crate::bil::Expression, Error> {
let param_args: Vec<_> = self
.arguments
.iter()
.filter(|arg| arg.intent.is_input())
.collect();
if param_args.len() != 1 {
return Err(anyhow!(
"Wrong number of return register: Got {}, expected 1",
param_args.len()
));
}
Ok(&param_args[0].location)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extern_symbol_serialization() {
let symbol = ExternSymbol {
tid: Tid::new("Tid"),
address: "Somewhere".to_string(),
name: "extern_fn".to_string(),
calling_convention: Some("cconv".to_string()),
arguments: Vec::new(),
};
let json: String = serde_json::to_string_pretty(&symbol).unwrap();
println!("{}", json);
let _symbol: ExternSymbol = serde_json::from_str(&json).unwrap();
}
}
use crate::analysis::abstract_domain::AbstractDomain;
use crate::prelude::*;
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
// TODO: This is a helper not only for abstract domains! It needs its own source file!
#[derive(Serialize, Deserialize, Debug, Hash, Clone)]
pub struct FastCmpArc<T>(pub Arc<T>);
impl<T: PartialEq + Eq> PartialEq for FastCmpArc<T> {
fn eq(&self, other: &Self) -> bool {
if Arc::ptr_eq(&self.0, &other.0) {
true
} else {
self.0.eq(&other.0)
}
}
}
impl<T: Eq> Eq for FastCmpArc<T> {}
impl<T: AbstractDomain + Clone> AbstractDomain for FastCmpArc<T> {
fn top(&self) -> Self {
FastCmpArc(Arc::new(self.0.top()))
}
fn merge(&self, other: &Self) -> Self {
if Arc::ptr_eq(&self.0, &other.0) {
self.clone()
} else {
FastCmpArc(Arc::new(self.0.merge(&other.0)))
}
}
}
impl<T: PartialOrd + Ord> PartialOrd for FastCmpArc<T> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl<T: PartialOrd + Ord> Ord for FastCmpArc<T> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
if Arc::ptr_eq(&self.0, &other.0) {
std::cmp::Ordering::Equal
} else {
self.0.cmp(&other.0)
}
}
}
impl<T> Deref for FastCmpArc<T> {
type Target = T;
fn deref(&self) -> &T {
&self.0
}
}
impl<T: Clone> DerefMut for FastCmpArc<T> {
fn deref_mut(&mut self) -> &mut T {
Arc::make_mut(&mut self.0)
}
}
use crate::prelude::*;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord, Default)]
pub struct CweWarning {
pub name: String,
pub version: String,
pub addresses: Vec<String>,
pub tids: Vec<String>,
pub symbols: Vec<String>,
pub other: Vec<Vec<String>>,
pub description: String,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub struct LogMessage {
pub text: String,
pub level: LogLevel,
pub location: Option<Tid>,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum LogLevel {
Debug,
Error,
Info,
}
impl std::fmt::Display for LogMessage {
fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(ref tid) = self.location {
match self.level {
LogLevel::Debug => write!(formatter, "Debug: {}: {}", tid.address, self.text),
LogLevel::Error => write!(formatter, "Error: {}: {}", tid.address, self.text),
LogLevel::Info => write!(formatter, "Info: {}: {}", tid.address, self.text),
}
} else {
match self.level {
LogLevel::Debug => write!(formatter, "Debug: {}", self.text),
LogLevel::Error => write!(formatter, "Error: {}", self.text),
LogLevel::Info => write!(formatter, "Info: {}", self.text),
}
}
}
}
pub mod fast_cmp_arc;
pub mod log;
(lang dune 1.6) (lang dune 2.0)
(name cwe_checker) (name cwe_checker)
...@@ -67,6 +67,7 @@ The names of all available modules can be printed with the [-module-versions] co ...@@ -67,6 +67,7 @@ The names of all available modules can be printed with the [-module-versions] co
To annotate CWE-hits in IDA Pro or Ghidra, first run {i cwe_checker} and save the JSON-formatted output to a file. To annotate CWE-hits in IDA Pro or Ghidra, first run {i cwe_checker} and save the JSON-formatted output to a file.
{[bap [BINARY] --pass=cwe-checker --cwe-checker-json --cwe-checker-out=cwe_hits.json]} {[bap [BINARY] --pass=cwe-checker --cwe-checker-json --cwe-checker-out=cwe_hits.json]}
After that execute the tool-specific script to import the results:
- For IDA Pro run the [cwe_checker_to_ida.py] script located in the [cwe_checker_to_ida] folder. - For IDA Pro run the [cwe_checker_to_ida.py] script located in the [cwe_checker_to_ida] folder.
{[python3 cwe_checker_to_ida.py -i cwe_hits.json -o cwe_hits.py]} {[python3 cwe_checker_to_ida.py -i cwe_hits.json -o cwe_hits.py]}
Now open the binary file in IDA Pro and execute the generated [cwe_hits.py] script from within IDA Pro (Alt+F7). Now open the binary file in IDA Pro and execute the generated [cwe_hits.py] script from within IDA Pro (Alt+F7).
......
all:
bapbuild -pkgs yojson,unix,ppx_jane,cwe_checker_core cwe_checker_pointer_inference_debug.plugin
bapbundle install cwe_checker_pointer_inference_debug.plugin
clean:
bapbuild -clean
uninstall:
bapbundle remove cwe_checker_pointer_inference_debug.plugin
open Bap.Std
open Core_kernel
open Cwe_checker_core
include Self()
let main project =
let program = Project.program project in
let tid_map = Address_translation.generate_tid_map program in
Pointer_inference.run_and_print_debug project tid_map
module Cmdline = struct
open Config
let () = when_ready (fun ({get=(!!)}) -> Project.register_pass' main)
let () = manpage [`S "DESCRIPTION";
`P "This plugin prints verbose debug information from the pointer inference analysis of the cwe_checker to stdout."]
end
open Bap.Std
open Core_kernel
external rs_run_pointer_inference: Serde_json.t -> string = "rs_run_pointer_inference"
external rs_run_pointer_inference_and_print_debug: Serde_json.t -> unit = "rs_run_pointer_inference_and_print_debug"
type cwelist = Log_utils.CweWarning.t array [@@deriving yojson]
let run (project: Project.t) (tid_map: Bap.Std.word Bap.Std.Tid.Map.t) : unit =
let program = Project.program project in
let entry_points = Symbol_utils.get_program_entry_points program in
let entry_points = List.map entry_points ~f:(fun sub -> Term.tid sub) in
let extern_symbols = Symbol_utils.build_and_return_extern_symbols project program tid_map in
let project_serde = Serde_json.of_project project extern_symbols entry_points tid_map in
let cwe_warnings_json = Yojson.Safe.from_string @@ rs_run_pointer_inference project_serde in
match cwe_warnings_json with
| `List ((`List cwe_warnings) :: (`List log_messages) :: []) ->
List.iter cwe_warnings ~f:(fun warning -> Log_utils.collect_cwe_warning @@ Result.ok_or_failwith @@ Log_utils.CweWarning.of_yojson warning);
List.iter log_messages ~f:(fun message ->
match message with
| `String message_string ->
begin match String.lsplit2 message_string ~on:':' with
| Some("Error", msg) -> Log_utils.error @@ String.strip msg
| Some("Debug", msg) -> Log_utils.debug @@ String.strip msg
| Some("Info", msg) -> Log_utils.info @@ String.strip msg
| _ -> failwith "Malformed log-message."
end
| _ -> failwith "Log-message is not a string."
)
| _ -> failwith "Log-message-json not as expected"
let run_and_print_debug (project: Project.t) (tid_map: Bap.Std.word Bap.Std.Tid.Map.t) : unit =
let program = Project.program project in
let entry_points = Symbol_utils.get_program_entry_points program in
let entry_points = List.map entry_points ~f:(fun sub -> Term.tid sub) in
let extern_symbols = Symbol_utils.build_and_return_extern_symbols project program tid_map in
let project_serde = Serde_json.of_project project extern_symbols entry_points tid_map in
rs_run_pointer_inference_and_print_debug project_serde
(** This module manages the communication with the actual pointer inference analysis
through the foreign function interface to Rust.
*)
open Bap.Std
(** Run the pointer inference analysis and log the returned CWE warnings and log messages. *)
val run: Project.t -> Bap.Std.word Bap.Std.Tid.Map.t -> unit
(** Run the pointer inference analysis and print the computed state of each basic block
(at the start and at the end of the basic block respectively)
as json to stdout.
Does not print log messages or CWE warnings.
The output is meant for debugging purposes.
*)
val run_and_print_debug: Project.t -> Bap.Std.word Bap.Std.Tid.Map.t -> unit
open Bap.Std
let name = "Memory"
let version = "0.1"
let check_cwe (_program: Program.t) (project: Project.t) (tid_map: word Tid.Map.t) (_: string list list) (_: string list) =
Pointer_inference.run project tid_map
(** This module implements memory-related CWE checks.
Right now the check detects cases of
- {{: https://cwe.mitre.org/data/definitions/415.html} CWE 415: Double Free}
- {{: https://cwe.mitre.org/data/definitions/416.html} CWE 416: Use After Free}
{1 How the check works}
Via Dataflow Analysis, the check tries to keep track of all memory objects and pointers
known at specific points in the program.
It also keeps track of the status of memory object, i.e. if they have been already freed.
Access to an already freed object generates a CWE warning.
In cases where the analysis cannot reliably determine whether accessed memory has been freed or not,
a CWE warning may (or may not) be issued to the user based on the likelihood of it being a false positive.
Note that the check is still experimental.
Bugs may occur and the rate of false positive and false negative warnings is not yet known.
*)
val name: string
val version: string
val check_cwe: Bap.Std.program Bap.Std.term -> Bap.Std.project -> Bap.Std.word Bap.Std.Tid.Map.t -> string list list -> string list -> unit
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
core_kernel core_kernel
ppx_deriving_yojson.runtime) ppx_deriving_yojson.runtime)
(preprocess (pps ppx_jane ppx_deriving_yojson)) (preprocess (pps ppx_jane ppx_deriving_yojson))
(foreign_archives cwe_checker_rs)
(c_library_flags (-lpthread -lc -lm)) ; needed for linking the Rust runtime
) )
(include_subdirs unqualified) ; Include all subdirs when looking for source files (include_subdirs unqualified) ; Include all subdirs when looking for source files
(lang dune 1.6) (lang dune 2.0)
...@@ -24,7 +24,8 @@ let known_modules = [{cwe_func = Cwe_190.check_cwe; name = Cwe_190.name; version ...@@ -24,7 +24,8 @@ let known_modules = [{cwe_func = Cwe_190.check_cwe; name = Cwe_190.name; version
{cwe_func = Cwe_476.check_cwe; name = Cwe_476.name; version = Cwe_476.version; requires_pairs = false; has_parameters = true}; {cwe_func = Cwe_476.check_cwe; name = Cwe_476.name; version = Cwe_476.version; requires_pairs = false; has_parameters = true};
{cwe_func = Cwe_560.check_cwe; name = Cwe_560.name; version = Cwe_560.version; requires_pairs = false; has_parameters = false}; {cwe_func = Cwe_560.check_cwe; name = Cwe_560.name; version = Cwe_560.version; requires_pairs = false; has_parameters = false};
{cwe_func = Cwe_676.check_cwe; name = Cwe_676.name; version = Cwe_676.version; requires_pairs = false; has_parameters = false}; {cwe_func = Cwe_676.check_cwe; name = Cwe_676.name; version = Cwe_676.version; requires_pairs = false; has_parameters = false};
{cwe_func = Cwe_782.check_cwe; name = Cwe_782.name; version = Cwe_782.version; requires_pairs = false; has_parameters = false}] {cwe_func = Cwe_782.check_cwe; name = Cwe_782.name; version = Cwe_782.version; requires_pairs = false; has_parameters = false};
{cwe_func = Memory_cwes.check_cwe; name = Memory_cwes.name; version = Memory_cwes.version; requires_pairs = false; has_parameters = false}]
let cmdline_flags = [ let cmdline_flags = [
...@@ -97,7 +98,10 @@ let full_run project config = ...@@ -97,7 +98,10 @@ let full_run project config =
let program = Project.program project in let program = Project.program project in
let tid_address_map = Address_translation.generate_tid_map program in let tid_address_map = Address_translation.generate_tid_map program in
let json = Yojson.Basic.from_file config in let json = Yojson.Basic.from_file config in
List.iter known_modules ~f:(fun cwe -> execute_cwe_module cwe json program project tid_address_map) let full_run_modules = List.filter known_modules ~f:(fun cwe_module ->
cwe_module.name <> "Memory" (* TODO: Remove this when the memory check is more stable *)
) in
List.iter full_run_modules ~f:(fun cwe -> execute_cwe_module cwe json program project tid_address_map)
let build_output_path (path : string) : string = let build_output_path (path : string) : string =
......
...@@ -130,15 +130,11 @@ let parse_dyn_sym_line (line : string) : string option = ...@@ -130,15 +130,11 @@ let parse_dyn_sym_line (line : string) : string option =
done; done;
str_list := !line :: !str_list; str_list := !line :: !str_list;
match !str_list with match !str_list with
| value :: func1 :: func2 :: _ -> begin | _value :: func1 :: func2 :: _ -> begin
match ( String.strip ~drop:(fun x -> x = '0') value ) with if (String.equal func1 "DF" || String.equal func2 "DF") then (
| "" -> begin List.last !str_list
if (String.equal func1 "DF" || String.equal func2 "DF") then ( )
List.last !str_list else None
)
else None
end
| _ -> None (* The symbol has a nonzero value, so we assume that it is not an extern function symbol. *)
end end
| _ -> None | _ -> None
......
(* This module implements functionality related to parsing the JSON configuration file. *) (** This module implements functionality related to parsing the JSON configuration file.
*)
(** Returns pairs of symbols for a given CWE check. *) (** Returns pairs of symbols for a given CWE check. *)
......
...@@ -23,7 +23,7 @@ module CweWarning : sig ...@@ -23,7 +23,7 @@ module CweWarning : sig
symbols: string list; symbols: string list;
other : string list list; other : string list list;
description : string; description : string;
} } [@@deriving yojson]
end end
module CheckPath : sig module CheckPath : sig
......
(** This module allows the creation of SerdeJson objects that can be deserialized
to the corresponding data type in Rust.
Note that this is not optimized for speed, extensive usage could lead to measureable slowdown.
*)
open Core_kernel
open Bap.Std
type t
(** Build a Json Null object *)
val build_null: unit -> t
(** Build a Json boolean object *)
val build_bool: Bool.t -> t
(** Build a Json number object *)
val build_number: int -> t
(** Build a Json string object *)
val build_string: String.t -> t
(** Build a Json array object from a list of Json objects *)
val build_array: t List.t -> t
(** Build a Json object from a list of key-value-pairs *)
val build_object: (String.t * t) List.t -> t
(** Get the Json string corresponding to a Json object *)
val to_string: t -> String.t
val of_var_type: Bil.Types.typ -> t
val of_var: Var.t -> t
val of_cast_type: Bil.Types.cast -> t
val of_binop_type: Bil.Types.binop -> t
val of_unop_type: Bil.Types.unop -> t
val of_endianness: Bitvector.endian -> t
val of_bitvector: Bitvector.t -> t
val of_exp: Exp.t -> t
val of_tid: Tid.t -> word Tid.Map.t -> t
val of_def: Def.t -> word Tid.Map.t -> t
val of_jmp_label: Label.t -> word Tid.Map.t -> t
val of_call: Call.t -> word Tid.Map.t -> t
val of_jmp_kind: jmp_kind -> word Tid.Map.t -> t
val of_jmp: Jmp.t -> word Tid.Map.t -> t
val of_blk: Blk.t -> word Tid.Map.t -> t
val of_sub: Sub.t -> word Tid.Map.t -> t
val of_extern_symbol: Symbol_utils.extern_symbol -> word Tid.Map.t -> t
val of_program: Program.t -> Symbol_utils.extern_symbol List.t -> Tid.t List.t -> word Tid.Map.t -> t
val of_project: Project.t -> Symbol_utils.extern_symbol List.t -> Tid.t List.t -> word Tid.Map.t -> t
...@@ -2,12 +2,14 @@ import os ...@@ -2,12 +2,14 @@ import os
import subprocess import subprocess
def build_bap_cmd(filename, target, arch, compiler): def build_bap_cmd(filename, target, arch, compiler, check_name = None):
if check_name is None:
check_name = 'CWE%s' % target
if 'travis' in os.environ['USER']: if 'travis' in os.environ['USER']:
abs_path = os.path.abspath('test/artificial_samples/build/cwe_%s_%s_%s.out' % (filename, arch, compiler)) abs_path = os.path.abspath('test/artificial_samples/build/cwe_%s_%s_%s.out' % (filename, arch, compiler))
cmd = 'docker run --rm -v %s:/tmp/input cwe-checker:latest bap /tmp/input --pass=cwe-checker --cwe-checker-partial=CWE%s --cwe-checker-config=/home/bap/cwe_checker/src/config.json' % (abs_path, target) cmd = 'docker run --rm -v %s:/tmp/input cwe-checker:latest cwe_checker /tmp/input -partial=%s' % (abs_path, check_name)
else: else:
cmd = 'bap test/artificial_samples/build/cwe_%s_%s_%s.out --pass=cwe-checker --cwe-checker-partial=CWE%s --cwe-checker-config=src/config.json' % (filename, arch, compiler, target) cmd = 'cwe_checker test/artificial_samples/build/cwe_%s_%s_%s.out -partial=%s' % (filename, arch, compiler, check_name)
return cmd.split() return cmd.split()
...@@ -20,9 +22,9 @@ def build_bap_emulation_cmd(filename, target, arch, compiler): ...@@ -20,9 +22,9 @@ def build_bap_emulation_cmd(filename, target, arch, compiler):
return cmd.split() return cmd.split()
def execute_and_check_occurence(filename, target, arch, compiler, string): def execute_and_check_occurence(filename, target, arch, compiler, string, check_name = None):
occurence = 0 occurence = 0
bap_cmd = build_bap_cmd(filename, target, arch, compiler) bap_cmd = build_bap_cmd(filename, target, arch, compiler, check_name)
output = subprocess.check_output(bap_cmd) output = subprocess.check_output(bap_cmd)
for l in output.splitlines(): for l in output.splitlines():
if string in l: if string in l:
......
import unittest
import cwe_checker_testlib
class TestCwe415(unittest.TestCase):
def setUp(self):
self.target = '415'
self.string = b'Double Free'
def test_cwe415_01_x64_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'x64', 'gcc', self.string)
self.assertEqual(res, expect_res)
def test_cwe415_01_x64_clang(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'x64', 'clang', self.string)
self.assertEqual(res, expect_res)
def test_cwe415_01_x86_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'x86', 'gcc', self.string)
self.assertEqual(res, expect_res)
def test_cwe415_01_x86_clang(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'x86', 'clang', self.string)
self.assertEqual(res, expect_res)
def test_cwe415_01_arm_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'arm', 'gcc', self.string)
self.assertEqual(res, expect_res)
def test_cwe415_01_arm_clang(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'arm', 'clang', self.string)
self.assertEqual(res, expect_res)
@unittest.skip('Not supported by BAP. (no recognizable code backtrace)')
def test_cwe415_01_aarch64_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'aarch64', 'gcc', self.string)
self.assertEqual(res, expect_res)
@unittest.skip('Not supported by BAP. (no recognizable code backtrace)')
def test_cwe415_01_aarch64_clang(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'aarch64', 'clang', self.string)
self.assertEqual(res, expect_res)
@unittest.skip("Depends on proper MIPS support in BAP")
def test_cwe415_01_mips_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mips', 'gcc', self.string)
self.assertEqual(res, expect_res)
def test_cwe415_01_mips_clang(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'mips', 'clang', self.string)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME")
def test_cwe415_01_mipsel_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'mipsel', 'gcc', self.string)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME")
def test_cwe415_01_mipsel_clang(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'mipsel', 'clang', self.string)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME")
def test_cwe415_01_mips64_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'mips64', 'gcc', self.string)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME")
def test_cwe415_01_mips64_clang(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'mips64', 'clang', self.string)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME")
def test_cwe415_01_mips64el_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'mips64el', 'gcc', self.string)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME")
def test_cwe415_01_mips64el_clang(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'mips64el', 'clang', self.string)
self.assertEqual(res, expect_res)
@unittest.skip('FIXME: Check again when moved to BAP 2.1')
def test_cwe415_01_ppc_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'ppc', 'gcc', self.string)
self.assertEqual(res, expect_res)
@unittest.skip('Not supported by BAP. (no recognizable code backtrace)')
def test_cwe415_01_ppc64_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'ppc64', 'gcc', self.string)
self.assertEqual(res, expect_res)
@unittest.skip('Not supported by BAP. (no recognizable code backtrace)')
def test_cwe415_01_ppc64_clang(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'ppc64', 'clang', self.string)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME")
def test_cwe415_01_ppc64le_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'ppc64le', 'gcc', self.string)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME")
def test_cwe415_01_ppc64le_clang(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'ppc64le', 'clang', self.string)
self.assertEqual(res, expect_res)
@unittest.skip('FIXME: Only finds two of three possible double free')
def test_cwe415_01_x86_mingw_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'x86', 'mingw32-gcc', self.string)
self.assertEqual(res, expect_res)
@unittest.skip('FIXME: Only finds two of three possible double free')
def test_cwe415_01_x64_mingw_gcc(self):
expect_res = 3
res = cwe_checker_testlib.execute_emulation_and_check_occurence(
self.target, self.target, 'x64', 'mingw32-gcc', self.string)
self.assertEqual(res, expect_res)
...@@ -8,9 +8,9 @@ class TestCheckPath(unittest.TestCase): ...@@ -8,9 +8,9 @@ class TestCheckPath(unittest.TestCase):
def setUp(self): def setUp(self):
if 'travis' in os.environ['USER']: if 'travis' in os.environ['USER']:
abs_path = os.path.abspath('test/artificial_samples/build/check_path_x64_gcc.out') abs_path = os.path.abspath('test/artificial_samples/build/check_path_x64_gcc.out')
self.cmd = 'docker run --rm -v %s:/tmp/input cwe-checker:latest bap /tmp/input --pass=cwe-checker --cwe-checker-config=/home/bap/cwe_checker/src/config.json --cwe-checker-json --cwe-checker-check-path' % abs_path self.cmd = 'docker run --rm -v %s:/tmp/input cwe-checker:latest cwe_checker /tmp/input -config=/home/bap/cwe_checker/src/config.json -json -check-path -no-logging' % abs_path
else: else:
self.cmd = 'bap test/artificial_samples/build/check_path_x64_gcc.out --pass=cwe-checker --cwe-checker-config=src/config.json --cwe-checker-json --cwe-checker-check-path' self.cmd = 'cwe_checker test/artificial_samples/build/check_path_x64_gcc.out -config=src/config.json -json -check-path -no-logging'
def test_check_path_01_x64_gcc(self): def test_check_path_01_x64_gcc(self):
output = subprocess.check_output(self.cmd.split()) output = subprocess.check_output(self.cmd.split())
......
import unittest
import cwe_checker_testlib
class TestCwe415(unittest.TestCase):
def setUp(self):
self.target = '415'
self.string = b'Double Free'
self.check_name = 'Memory'
def test_cwe415_01_x64_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'x64', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
def test_cwe415_01_x64_clang(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'x64', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip('FIXME: Needs pointer alignment tracking, or else SP = SP & Const loses the stack offset')
def test_cwe415_01_x86_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'x86', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
def test_cwe415_01_x86_clang(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'x86', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res)
def test_cwe415_01_arm_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'arm', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
def test_cwe415_01_arm_clang(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'arm', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip('Not supported by BAP. (no recognizable code backtrace)')
def test_cwe415_01_aarch64_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'aarch64', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip('Not supported by BAP. (no recognizable code backtrace)')
def test_cwe415_01_aarch64_clang(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'aarch64', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME: Check again when BAP handles the ZERO register of MIPS.")
def test_cwe415_01_mips_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mips', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME: Check again when BAP handles the ZERO register of MIPS.")
def test_cwe415_01_mips_clang(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mips', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME: Check again when BAP handles the ZERO register of MIPS.")
def test_cwe415_01_mipsel_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mipsel', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip("FIXME: Check again when BAP handles the ZERO register of MIPS.")
def test_cwe415_01_mipsel_clang(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mipsel', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip("BAP does not recognize extern calls")
def test_cwe415_01_mips64_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mips64', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip("BAP does not recognize extern calls")
def test_cwe415_01_mips64_clang(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mips64', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip("BAP does not recognize extern calls")
def test_cwe415_01_mips64el_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mips64el', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip("BAP does not recognize extern calls")
def test_cwe415_01_mips64el_clang(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'mips64el', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res)
def test_cwe415_01_ppc_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'ppc', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip('Dynamic Symbol calls are mangled by BAP')
def test_cwe415_01_ppc64_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'ppc64', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip('Dynamic Symbol calls are mangled by BAP')
def test_cwe415_01_ppc64_clang(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'ppc64', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip('Dynamic Symbol calls are mangled by BAP')
def test_cwe415_01_ppc64le_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'ppc64le', 'gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip('Dynamic Symbol calls are mangled by BAP')
def test_cwe415_01_ppc64le_clang(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'ppc64le', 'clang', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip('FIXME')
def test_cwe415_01_x86_mingw_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'x86', 'mingw32-gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
@unittest.skip('FIXME')
def test_cwe415_01_x64_mingw_gcc(self):
expect_res = 2
res = cwe_checker_testlib.execute_and_check_occurence(
self.target, self.target, 'x64', 'mingw32-gcc', self.string, self.check_name)
self.assertEqual(res, expect_res)
...@@ -8,9 +8,9 @@ class TestJson(unittest.TestCase): ...@@ -8,9 +8,9 @@ class TestJson(unittest.TestCase):
def setUp(self): def setUp(self):
if 'travis' in os.environ['USER']: if 'travis' in os.environ['USER']:
abs_path = os.path.abspath('test/artificial_samples/build/cwe_190_x64_gcc.out') abs_path = os.path.abspath('test/artificial_samples/build/cwe_190_x64_gcc.out')
self.cmd = 'docker run --rm -v %s:/tmp/input cwe-checker:latest bap /tmp/input --pass=cwe-checker --cwe-checker-config=/home/bap/cwe_checker/src/config.json --cwe-checker-json' % abs_path self.cmd = 'docker run --rm -v %s:/tmp/input cwe-checker:latest cwe_checker /tmp/input -config=/home/bap/cwe_checker/src/config.json -json -no-logging' % abs_path
else: else:
self.cmd = 'bap test/artificial_samples/build/cwe_190_x64_gcc.out --pass=cwe-checker --cwe-checker-config=src/config.json --cwe-checker-json' self.cmd = 'cwe_checker test/artificial_samples/build/cwe_190_x64_gcc.out -config=src/config.json -json -no-logging'
def test_can_output_json(self): def test_can_output_json(self):
output = subprocess.check_output(self.cmd.split()) output = subprocess.check_output(self.cmd.split())
......
...@@ -8,7 +8,7 @@ all: ...@@ -8,7 +8,7 @@ all:
bap tmp/arrays_arm_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=Cconv bap tmp/arrays_arm_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=Cconv
bap tmp/arrays_mips64_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=Cconv bap tmp/arrays_mips64_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=Cconv
bap tmp/arrays_powerpc_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=Cconv bap tmp/arrays_powerpc_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=Cconv
bap tmp/arrays_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=MemRegion,TypeInference,CWE476 bap tmp/arrays_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=MemRegion,TypeInference,CWE476,SerdeJson
bap tmp/c_constructs_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=CWE560,AddrTrans bap tmp/c_constructs_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=CWE560,AddrTrans
bapbundle remove unit_tests_cwe_checker.plugin bapbundle remove unit_tests_cwe_checker.plugin
......
...@@ -11,7 +11,8 @@ ...@@ -11,7 +11,8 @@
(include_subdirs unqualified) ; Include all subdirs when looking for source files (include_subdirs unqualified) ; Include all subdirs when looking for source files
(alias (rule
(name runtest) (alias runtest)
(deps unit_tests_cwe_checker.exe) (deps unit_tests_cwe_checker.exe)
(action (run %{deps} --color=always))) (action (run %{deps} --color=always))
)
...@@ -34,6 +34,7 @@ let unit_test_list = [ ...@@ -34,6 +34,7 @@ let unit_test_list = [
"CWE476", Cwe_476_test.tests; "CWE476", Cwe_476_test.tests;
"CWE560", Cwe_560_test.tests; "CWE560", Cwe_560_test.tests;
"AddrTrans", Address_translation_test.tests; "AddrTrans", Address_translation_test.tests;
"SerdeJson", Serde_json_test.tests;
] ]
...@@ -57,13 +58,14 @@ let set_example_project (project : Project.t) (tests : string list) = ...@@ -57,13 +58,14 @@ let set_example_project (project : Project.t) (tests : string list) =
List.iter tests ~f:(fun test -> List.iter tests ~f:(fun test ->
match test with match test with
| "TypeInference" -> Type_inference_test.example_project := Some(project) | "TypeInference" -> Type_inference_test.example_project := Some(project)
| "Cconv" -> begin | "Cconv" -> begin
Cconv_test.example_project := Some(project); Cconv_test.example_project := Some(project);
Cconv_test.example_arch := Some(arch); Cconv_test.example_arch := Some(arch);
check_for_cconv project arch; check_for_cconv project arch;
Cconv_test.example_bin_format := Some(get_test_bin_format project) Cconv_test.example_bin_format := Some(get_test_bin_format project)
end end
| "CWE476" -> Cwe_476_test.example_project := Some(project) | "CWE476" -> Cwe_476_test.example_project := Some(project)
| "SerdeJson" -> Serde_json_test.example_project := Some(project)
| _ -> () | _ -> ()
) )
......
open Core_kernel
open Cwe_checker_core
open Bap.Std
let example_project = ref None
let check msg x = Alcotest.(check bool) msg true x
let test_serde () =
let open Serde_json in
let serde = build_null () in
let json = to_string serde in
print_endline json;
check "serde_null" (json = "null");
let serde = build_bool true in
let json = to_string serde in
print_endline json;
check "serde_bool" (json = "true");
let serde = build_number 45 in
let json = to_string serde in
print_endline json;
check "serde_number" (json = "45");
let serde = build_string "hello" in
let json = to_string serde in
print_endline json;
check "serde_string" (json = "\"hello\"");
let serde = build_array [build_number 23; build_bool false] in
let json = to_string serde in
print_endline json;
check "serde_array" (json = "[23,false]");
let serde = build_object [("hello", build_number 23); ("world", build_bool false)] in
let json = to_string serde in
print_endline json;
check "serde_object" (json = "{\"hello\":23,\"world\":false}")
let test_type_conversions () =
let var_type = Bil.Types.Mem (`r64, `r8) in
let serde = Serde_json.of_var_type var_type in
let json = Serde_json.to_string serde in
print_endline json;
check "Var_Type" (json = "{\"Memory\":{\"addr_size\":64,\"elem_size\":8}}");
let var = Var.create "RAX" var_type in
let serde = Serde_json.of_var var in
let json = Serde_json.to_string serde in
print_endline json;
check "Var" (json = "{\"is_temp\":false,\"name\":\"RAX\",\"type_\":{\"Memory\":{\"addr_size\":64,\"elem_size\":8}}}");
let cast_type = Bil.Types.UNSIGNED in
let serde = Serde_json.of_cast_type cast_type in
let json = Serde_json.to_string serde in
print_endline json;
check "Cast_Type" (json = "\"UNSIGNED\"");
let unop = Bil.Types.NEG in
let serde = Serde_json.of_unop_type unop in
let json = Serde_json.to_string serde in
print_endline json;
check "Unop_Type" (json = "\"NEG\"");
let bitv = Bitvector.of_int ~width:8 234 in
let serde = Serde_json.of_bitvector bitv in
let json = Serde_json.to_string serde in
print_endline json;
check "Bitvector" (json = "{\"digits\":[234],\"width\":[8]}");
let exp = Bil.binop Bil.PLUS (Bil.int bitv) (Bil.int bitv) in
let serde = Serde_json.of_exp exp in
let json = Serde_json.to_string serde in
print_endline json;
check "Expression" (json = "{\"BinOp\":{\"lhs\":{\"Const\":{\"digits\":[234],\"width\":[8]}},\"op\":\"PLUS\",\"rhs\":{\"Const\":{\"digits\":[234],\"width\":[8]}}}}");
let tid = Tid.for_name "block" in
let term = Blk.create ~tid () in
let tid_map = Tid.Map.empty in
let serde = Serde_json.of_blk term tid_map in
let json = Serde_json.to_string serde in
print_endline json;
check "Block_term" (json = "{\"term\":{\"defs\":[],\"jmps\":[]},\"tid\":{\"address\":\"UNKNOWN\",\"id\":\"@block\"}}";)
let test_project_conversion () =
let project = Option.value_exn !example_project in
let program = Project.program project in
let tid_map = Address_translation.generate_tid_map program in
let extern_symbols = Symbol_utils.build_and_return_extern_symbols project program tid_map in
let entry_points = [] in
let serde = Serde_json.of_program program extern_symbols entry_points tid_map in
let _json = Serde_json.to_string serde in
(* TODO: The unit test for pointer inference should be moved to another file *)
Pointer_inference.run project tid_map;
Log_utils.emit_json "bin" "";
Log_utils.emit_native "";
check "Project" true
let tests = [
"Serde Json Conversions", `Quick, test_serde;
"Type Conversions", `Quick, test_type_conversions;
"Project conversion", `Quick, test_project_conversion;
]
open Bap.Std
open Core_kernel
val example_project: Project.t option ref
val tests: unit Alcotest.test_case list
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment