Unverified Commit 88c11cc7 by Enkelmann Committed by GitHub

improve documentation (#155)

parent 5434967e
......@@ -27,7 +27,7 @@ compile_test_files:
codestyle-check:
cargo fmt -- --check
cargo clippy -- -D clippy::all
cargo clippy -- -D clippy::all -D missing_docs
clean:
cargo clean
......
//! This crate defines the command line interface for the cwe_checker.
//! General documentation about the cwe_checker is contained in the [`cwe_checker_lib`] crate.
extern crate cwe_checker_lib; // Needed for the docstring-link to work
use cwe_checker_lib::analysis::graph;
use cwe_checker_lib::utils::binary::RuntimeMemoryImage;
use cwe_checker_lib::utils::log::print_all_messages;
......@@ -22,7 +27,8 @@ struct CmdlineArgs {
#[structopt(long, short, validator(check_file_existence))]
config: Option<String>,
/// Write the results to a file.
/// Write the results to a file instead of stdout.
/// This only affects CWE warnings. Log messages are still printed to stdout.
#[structopt(long, short)]
out: Option<String>,
......@@ -36,7 +42,7 @@ struct CmdlineArgs {
#[structopt(long, short)]
json: bool,
/// Do not print log messages. This prevents polluting STDOUT for json output.
/// Do not print log messages. This prevents polluting stdout for json output.
#[structopt(long, short)]
quiet: bool,
......
......@@ -7,7 +7,9 @@ use crate::prelude::*;
/// As values it can only assume a known bitvector or *Top(bytesize)*.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum BitvectorDomain {
/// The `Top` value of the domain, representing the case that nothing is known about the actual value.
Top(ByteSize),
/// The exact value of the bitvector is known.
Value(Bitvector),
}
......
......@@ -10,8 +10,13 @@ use std::fmt::Display;
/// Both non-pointer values and offsets of pointers are represented by the same abstract domain `T`.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub enum DataDomain<T: RegisterDomain> {
/// The `Top` element of the domain.
/// Describes a value for which nothing is known.
Top(ByteSize),
/// The value is a pointer to an abstract memory object.
Pointer(PointerDomain<T>),
/// The value is a non-pointer value or a pointer to global memory.
/// The latter can happen if pointers to global memory are described by their absolute value.
Value(T),
}
......
......@@ -57,7 +57,12 @@ impl std::fmt::Display for AbstractIdentifier {
/// It is also impossible to accidently describe circular references.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum AbstractLocation {
/// The location is given by a register with the given name and byte size.
Register(String, ByteSize),
/// The location is in memory.
/// One needs to follow the pointer in the register with the given name (as `String`)
/// and then follow the abstract memory location inside the pointed to memory object
/// to find the actual memory location.
Pointer(String, AbstractMemoryLocation),
}
......@@ -93,13 +98,20 @@ impl AbstractLocation {
/// The offset and size variables are given in bytes.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum AbstractMemoryLocation {
/// A location inside the current memory object.
Location {
/// The offset with respect to the zero offset of the memory object where the value can be found.
offset: isize,
/// The size in bytes of the value that the memory location points to.
size: usize,
},
/// A pointer which needs to be followed to get to the actual memory location
Pointer {
/// The offset inside the current memory object where the pointer can be found.
offset: isize,
/// The size in bytes of the pointer.
size: usize,
/// The memory location inside the target of the pointer that this memory location points to.
target: Box<AbstractMemoryLocation>,
},
}
......
......@@ -57,7 +57,7 @@ impl<T: AbstractDomain + SizedDomain + HasTop + std::fmt::Debug> HasTop for MemR
}
impl<T: AbstractDomain + SizedDomain + HasTop + std::fmt::Debug> MemRegion<T> {
// Create a new, empty memory region.
/// Create a new, empty memory region.
pub fn new(address_bytesize: ByteSize) -> Self {
MemRegion(Arc::new(MemRegionData::new(address_bytesize)))
}
......
......@@ -23,12 +23,14 @@ pub use interval::*;
/// The main trait describing an abstract domain.
///
/// Each abstract domain is partially ordered and has a maximal element (which can be generated by `top()`).
/// Each abstract domain is partially ordered.
/// Abstract domains of the same type can be merged.
pub trait AbstractDomain: Sized + Eq + Clone {
/// Return an upper bound (with respect to the partial order on the domain) for the two inputs `self` and `other`.
fn merge(&self, other: &Self) -> Self;
/// Returns whether the element represents the top element or not.
/// Returns whether the element represents the top element (i.e. maximal with respect to the partial order) or not.
/// If a domain has no maximal element, this function should always return false.
fn is_top(&self) -> bool;
}
......
......@@ -28,6 +28,7 @@ use std::marker::PhantomData;
/// All edge transition functions can return `None` to indicate that no information flows through the edge.
/// For example, this can be used to indicate edges that can never been taken.
pub trait Context<'a> {
/// The type of the values that are assigned to nodes during the fixpoint computation.
type Value: PartialEq + Eq + Clone;
/// Get a reference to the graph that the fixpoint is computed on.
......@@ -241,6 +242,7 @@ impl<'a, T: Context<'a>> GeneralizedContext<'a, T> {
}
}
/// Get the inner context object.
pub fn get_context(&self) -> &T {
&self.context
}
......
......@@ -27,6 +27,7 @@ use std::marker::PhantomData;
/// All edge transition functions can return `None` to indicate that no information flows through the edge.
/// For example, this can be used to indicate edges that can never been taken.
pub trait Context<'a> {
/// The type of the values that are assigned to nodes during the fixpoint computation.
type Value: PartialEq + Eq + Clone;
/// Get a reference to the graph that the fixpoint is computed on.
......@@ -97,6 +98,7 @@ impl<'a, T: Context<'a>> GeneralizedContext<'a, T> {
}
}
/// Get the inner context object.
pub fn get_context(&self) -> &T {
&self.context
}
......
......@@ -65,14 +65,25 @@ pub type Graph<'a> = DiGraph<Node<'a>, Edge<'a>>;
/// to allow unambigous node identification.
#[derive(Serialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum Node<'a> {
/// A node corresponding to the start of a basic block,
/// i.e. to the point in time just before the execution of the block.
BlkStart(&'a Term<Blk>, &'a Term<Sub>),
/// A node corresponding to the end of the basic block,
/// i.e. to the point in time just after the execution of all `Def` instructions in the block
/// but before execution of the jump instructions at the end of the block.
BlkEnd(&'a Term<Blk>, &'a Term<Sub>),
/// An artificial node. See the module-level documentation for more information.
CallReturn {
/// The block containing the callsite of the call.
call: (&'a Term<Blk>, &'a Term<Sub>),
/// The block that the called functions returns to.
return_: (&'a Term<Blk>, &'a Term<Sub>),
},
/// An artificial node. See the module-level documentation for more information.
CallSource {
/// The block containing the callsite of the call
source: (&'a Term<Blk>, &'a Term<Sub>),
/// The block containing the target of the call, i.e. the first block of the target function.
target: (&'a Term<Blk>, &'a Term<Sub>),
},
}
......@@ -124,13 +135,29 @@ impl<'a> std::fmt::Display for Node<'a> {
/// In this case the other jump reference points to the untaken conditional jump.
#[derive(Serialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum Edge<'a> {
/// An edge between the `BlkStart` and `BlkEnd` nodes of a basic block.
Block,
/// An edge corresponding to an intraprocedural jump instruction.
/// If the jump is only taken if a previous conditional jump is not taken,
/// then a reference to the untaken conditional jump is also added to the jump label.
Jump(&'a Term<Jmp>, Option<&'a Term<Jmp>>),
/// An edge corresponding to a function call instruction.
/// Only generated for calls to functions inside the binary.
/// See the module-level documentation for more information.
Call(&'a Term<Jmp>),
/// An edge corresponding to a call to a function not contained in the binary,
/// i.e. the target is located in a shared object loaded by the binary.
/// The edge goes directly from the callsite to the return-to-site inside the caller.
ExternCallStub(&'a Term<Jmp>),
/// An artificial edge. See the module-level documentation for more information.
CRCallStub,
/// An artificial edge. See the module-level documentation for more information.
CRReturnStub,
/// An artificial edge to combine intra- and interprocedural data flows at the callsite of calls.
/// See the module-level documentation for more information.
CallCombine(&'a Term<Jmp>),
/// An artificial edge to combine intra- and interprocedural data flows at the return-to site of calls.
/// See the module-level documentation for more information.
ReturnCombine(&'a Term<Jmp>),
}
......
//! Types and functions shared between the implementations
//! of forward and backward interprocedural fixpoint computations.
use crate::prelude::*;
/// NodeValue that can either be a single abstract value or a
/// composition of the abstract value computed following an interprocedural
/// call in the graph and of the abstract value when the call is not taken.
/// The CallFlowCombinator then allows for a merge of the values computed
/// over both paths.
/// composition of the abstract value computed following an interprocedural call in the graph
/// and of the abstract value before or after the call (depending on the direction of the fixpoint analysis).
/// The CallFlowCombinator then allows for a merge of the values computed over both paths.
///
/// The call_stub value will either be transferred from the callsite to the return site
/// in a forward analysis or the other way around in a backward analysis.
......@@ -14,14 +16,22 @@ use crate::prelude::*;
/// to the callsite in a backward analysis.
#[derive(PartialEq, Eq, Serialize, Deserialize)]
pub enum NodeValue<T: PartialEq + Eq> {
/// A single abstract value
Value(T),
/// The value saved at artificial combinator nodes.
CallFlowCombinator {
/// The value flowing through the intraprocedural edge of the corresponding call.
call_stub: Option<T>,
/// The value flowing through the interprocedural edge of the corresponding call,
/// i.e. either between callsite and start of the called function
/// or between end of the called function and the return-to site of the call.
interprocedural_flow: Option<T>,
},
}
impl<T: PartialEq + Eq> NodeValue<T> {
/// Unwraps the contained value for non-combinator nodes.
/// Panics if given a combinator value of an artificial node.
pub fn unwrap_value(&self) -> &T {
match self {
NodeValue::Value(value) => value,
......
//! Modules necessary for graph-based and fixpoint-based analyses,
//! as well as analyses depending on these modules.
pub mod backward_interprocedural_fixpoint;
pub mod fixpoint;
pub mod forward_interprocedural_fixpoint;
......
//! The pointer inference analysis.
//! A fixpoint algorithm analyzing all memory accesses in a program.
//!
//! The goal of the pointer inference analysis is to keep track of all memory objects and pointers
//! that the program knows about at specific program points during execution.
......@@ -40,6 +40,7 @@ pub use state::State;
/// The version number of the analysis.
const VERSION: &str = "0.1";
/// The name and version number of the "Memory" CWE check.
pub static CWE_MODULE: crate::CweModule = crate::CweModule {
name: "Memory",
version: VERSION,
......@@ -54,18 +55,19 @@ pub type Data = DataDomain<BitvectorDomain>;
pub struct Config {
/// Names of extern functions that are `malloc`-like,
/// i.e. the unique return value is a pointer to a newly allocated chunk of memory or a NULL pointer.
allocation_symbols: Vec<String>,
pub allocation_symbols: Vec<String>,
/// Names of extern functions that are `free`-like,
/// i.e. the memory chunk that the unique parameter of the function points to gets deallocated.
/// Note that the analysis currently does not detect mismatching allocation-deallocation pairs,
/// i.e. it cannot distinguish between memory allocated by `malloc` and memory allocated by `new`.
deallocation_symbols: Vec<String>,
pub deallocation_symbols: Vec<String>,
}
/// A wrapper struct for the pointer inference computation object.
pub struct PointerInference<'a> {
computation: Computation<GeneralizedContext<'a, Context<'a>>>,
log_collector: crossbeam_channel::Sender<LogThreadMsg>,
/// The log messages and CWE warnings that have been generated during the pointer inference analysis.
pub collected_logs: (Vec<LogMessage>, Vec<CweWarning>),
}
......@@ -166,7 +168,7 @@ impl<'a> PointerInference<'a> {
/// Generate a compacted json representation of the results.
/// Note that this output cannot be used for serialization/deserialization,
/// but is only intended for user output.
/// but is only intended for user output and debugging.
pub fn generate_compact_json(&self) -> serde_json::Value {
let graph = self.computation.get_graph();
let mut json_nodes = serde_json::Map::new();
......@@ -179,18 +181,26 @@ impl<'a> PointerInference<'a> {
serde_json::Value::Object(json_nodes)
}
/// Print a compacted json representation of the results to stdout.
/// Note that this output cannot be used for serialization/deserialization,
/// but is only intended for user output and debugging.
pub fn print_compact_json(&self) {
println!("{:#}", self.generate_compact_json());
}
/// Get the underlying graph of the computation.
pub fn get_graph(&self) -> &Graph {
self.computation.get_graph()
}
/// Get the context object of the computation.
pub fn get_context(&self) -> &Context {
self.computation.get_context().get_context()
}
/// Get the value associated to a node in the computed fixpoint
/// (or intermediate state of the algorithm if the fixpoint has not been reached yet).
/// Returns `None` if no value is associated to the Node.
pub fn get_node_value(&self, node_id: NodeIndex) -> Option<&NodeValue<State>> {
self.computation.get_node_value(node_id)
}
......
//! This module contains the definition of the abstract memory object type.
use super::Data;
use crate::abstract_domain::*;
use crate::prelude::*;
......@@ -287,14 +289,18 @@ fn same_or_none<T: Eq + Clone>(left: &Option<T>, right: &Option<T>) -> Option<T>
/// An object is either a stack or a heap object.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy, PartialOrd, Ord)]
pub enum ObjectType {
/// A stack object, i.e. the stack frame of a function.
Stack,
/// A memory object located on the heap.
Heap,
}
/// An object is either alive or dangling (because the memory was freed or a function return invalidated the stack frame).
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy, PartialOrd, Ord)]
pub enum ObjectState {
/// The object is alive.
Alive,
/// The object is dangling, i.e. the memory has been freed already.
Dangling,
}
......
//! The implemented CWE checks.
//! See their module descriptions for detailed information about each check.
//!
//! Currently the `Memory` check is not contained in this module
//! but directly incorporated into the [`pointer_inference`](crate::analysis::pointer_inference) module.
//! See there for detailed information about this check.
pub mod cwe_190;
pub mod cwe_215;
pub mod cwe_243;
......
......@@ -34,6 +34,7 @@ use crate::utils::log::{CweWarning, LogMessage};
use crate::utils::symbol_utils::{get_callsites, get_symbol_map};
use crate::CweModule;
/// The module name and version
pub static CWE_MODULE: CweModule = CweModule {
name: "CWE190",
version: "0.1",
......
......@@ -22,6 +22,7 @@ use crate::prelude::*;
use crate::utils::log::{CweWarning, LogMessage};
use crate::CweModule;
/// The module name and version
pub static CWE_MODULE: CweModule = CweModule {
name: "CWE215",
version: "0.2",
......
......@@ -33,6 +33,7 @@ use crate::utils::log::{CweWarning, LogMessage};
use crate::utils::symbol_utils::find_symbol;
use crate::CweModule;
/// The module name and version
pub static CWE_MODULE: CweModule = CweModule {
name: "CWE243",
version: "0.2",
......
......@@ -25,6 +25,7 @@ use crate::utils::log::{CweWarning, LogMessage};
use crate::utils::symbol_utils::find_symbol;
use crate::CweModule;
/// The module name and version
pub static CWE_MODULE: CweModule = CweModule {
name: "CWE332",
version: "0.1",
......
......@@ -31,6 +31,7 @@ use crate::CweModule;
use petgraph::visit::EdgeRef;
use std::collections::HashMap;
/// The module name and version
pub static CWE_MODULE: CweModule = CweModule {
name: "CWE367",
version: "0.1",
......
......@@ -38,6 +38,7 @@ use crate::utils::symbol_utils::{find_symbol, get_calls_to_symbols};
use crate::CweModule;
use std::collections::HashMap;
/// The module name and version
pub static CWE_MODULE: CweModule = CweModule {
name: "CWE426",
version: "0.1",
......
......@@ -29,6 +29,7 @@ use crate::utils::log::{CweWarning, LogMessage};
use crate::utils::symbol_utils::{get_callsites, get_symbol_map};
use crate::CweModule;
/// The module name and version
pub static CWE_MODULE: CweModule = CweModule {
name: "CWE467",
version: "0.2",
......
......@@ -50,12 +50,13 @@ use std::collections::HashMap;
mod state;
use state::*;
pub mod taint;
mod taint;
pub use taint::*;
mod context;
use context::*;
/// The module name and version
pub static CWE_MODULE: CweModule = CweModule {
name: "CWE476",
version: "0.3",
......
......@@ -31,13 +31,16 @@ use crate::utils::log::{CweWarning, LogMessage};
use crate::utils::symbol_utils::{get_callsites, get_symbol_map};
use crate::CweModule;
/// The module name and version
pub static CWE_MODULE: CweModule = CweModule {
name: "CWE560",
version: "0.2",
run: check_cwe,
};
/// An upper bound for the value of a presumably correct umask argument.
pub static UPPER_BOUND_CORRECT_UMASK_ARG_VALUE: u64 = 0o177;
/// An upper bound for the value of a chmod-style argument.
pub static UPPER_BOUND_CORRECT_CHMOD_ARG_VALUE: u64 = 0o777;
/// Compute the parameter value of umask out of the basic block right before the umask call.
......
/*!
This module implements a check for CWE-676: Use of Potentially Dangerous Function.
Potentially dangerous functions like memcpy can lead to security issues like buffer overflows.
Potentially dangerous functions like memcpy can lead to security issues like buffer overflows.
See <https://cwe.mitre.org/data/definitions/676.html> for a detailed description.
How the check works:
* Calls to dangerous functions are flagged. The list of functions that are considered
dangerous can be configured in config.json. The default list is taken from
dangerous can be configured in config.json. The default list is based on
<https://github.com/01org/safestringlib/wiki/SDL-List-of-Banned-Functions>.
False Positives
......@@ -32,6 +32,7 @@ use serde::{Deserialize, Serialize};
const VERSION: &str = "0.1";
/// The module name and version
pub static CWE_MODULE: crate::CweModule = crate::CweModule {
name: "CWE676",
version: VERSION,
......@@ -106,6 +107,8 @@ pub fn resolve_symbols<'a>(
.collect()
}
/// Iterate through all function calls inside the program and flag calls to those functions
/// that are marked as unsafe via the configuration file.
pub fn check_cwe(
analysis_results: &AnalysisResults,
cwe_params: &serde_json::Value,
......
......@@ -61,6 +61,7 @@ use state::*;
mod context;
use context::*;
/// The module name and version
pub static CWE_MODULE: CweModule = CweModule {
name: "CWE78",
version: "0.1",
......
......@@ -28,6 +28,7 @@ use crate::{
const VERSION: &str = "0.1";
/// The module name and version
pub static CWE_MODULE: crate::CweModule = crate::CweModule {
name: "CWE782",
version: VERSION,
......@@ -66,6 +67,7 @@ pub fn generate_cwe_warning(calls: &[(&str, &Tid, &str)]) -> Vec<CweWarning> {
cwe_warnings
}
/// Iterate through all calls of the program and flag calls to `ioctl()` as CWE warnings.
pub fn check_cwe(
analysis_results: &AnalysisResults,
_cwe_params: &serde_json::Value,
......
......@@ -9,20 +9,33 @@ pub type Bitvector = apint::ApInt;
/// A trait to extend the bitvector type with useful helper functions
/// that are not contained in the [`apint`] crate.
/// See the implementation of the trait on the [`Bitvector`] type for more information.
pub trait BitvectorExtended: Sized {
/// Perform a cast operation on the bitvector.
/// Returns an error for non-implemented cast operations (currently all float-related casts).
fn cast(&self, kind: CastOpType, width: ByteSize) -> Result<Self, Error>;
/// Extract a subpiece from the given bitvector.
fn subpiece(&self, low_byte: ByteSize, size: ByteSize) -> Self;
/// Perform a unary operation on the given bitvector.
/// Returns an error for non-implemented operations (currently all float-related operations).
fn un_op(&self, op: UnOpType) -> Result<Self, Error>;
/// Perform a binary operation on the given bitvectors.
/// Returns an error for non-implemented operations (currently all float-related operations).
fn bin_op(&self, op: BinOpType, rhs: &Self) -> Result<Self, Error>;
/// Returns `true` if adding `self` to `rhs` would result in a signed integer overflow or underflow.
fn signed_add_overflow_check(&self, rhs: &Self) -> bool;
/// Returns `true` if subtracting `rhs` from `self` would result in a signed integer overflow or underflow.
fn signed_sub_overflow_check(&self, rhs: &Self) -> bool;
/// Return the result of multiplying `self` with `rhs`
/// and a flag that is set to `true` if the multiplication resulted in a signed integer overflow or underflow.
///
/// Returns an error for bitvectors larger than 8 bytes,
/// since multiplication for them is not yet implemented in the [`apint`] crate.
fn signed_mult_with_overflow_flag(&self, rhs: &Self) -> Result<(Self, bool), Error>;
}
......@@ -42,7 +55,7 @@ impl BitvectorExtended for Bitvector {
}
}
/// Extract a subpiece of the given bitvector.
/// Extract a subpiece from the given bitvector.
fn subpiece(&self, low_byte: ByteSize, size: ByteSize) -> Self {
self.clone()
.into_checked_lshr(low_byte.as_bit_length())
......
......@@ -32,26 +32,45 @@ pub enum Expression {
/// Note that most (but not all) operations require the left hand side (`lhs`)
/// and right hand side (`rhs`) to be of equal size.
BinOp {
/// The opcode/type of the operation
op: BinOpType,
/// The left hand side expression
lhs: Box<Expression>,
/// The right hand side expression
rhs: Box<Expression>,
},
/// A unary operation
UnOp { op: UnOpType, arg: Box<Expression> },
UnOp {
/// The opcode/type of the operation
op: UnOpType,
/// The argument expression
arg: Box<Expression>,
},
/// A cast operation for type cast between integer and floating point types of different byte lengths.
Cast {
/// The opcode/type of the cast operation
op: CastOpType,
/// The byte size of the result value of the expresion
size: ByteSize,
/// The argument of the expression
arg: Box<Expression>,
},
/// An unknown value but with known size.
/// This may be generated for e.g. unsupported assembly instructions.
/// Note that computation of an unknown value is still required to be side-effect-free!
Unknown { description: String, size: ByteSize },
Unknown {
/// A description of the operation
description: String,
/// The byte size of the result of the unknown expression
size: ByteSize,
},
/// Extracting a sub-bitvector from the argument expression.
Subpiece {
/// The lowest byte (i.e. least significant byte if interpreted as integer) of the sub-bitvector to extract.
low_byte: ByteSize,
/// The size of the resulting sub-bitvector
size: ByteSize,
/// The argument from which to extract the bitvector from.
arg: Box<Expression>,
},
}
......@@ -368,7 +387,9 @@ impl Expression {
}
}
/// The type/mnemonic of a binary operation
/// The type/mnemonic of a binary operation.
/// See the Ghidra P-Code documentation for more information.
#[allow(missing_docs)]
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum BinOpType {
Piece,
......@@ -408,6 +429,8 @@ pub enum BinOpType {
}
/// The type/mnemonic of a typecast
/// See the Ghidra P-Code documentation for more information.
#[allow(missing_docs)]
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum CastOpType {
IntZExt,
......@@ -419,6 +442,8 @@ pub enum CastOpType {
}
/// The type/mnemonic of an unary operation
/// See the Ghidra P-Code documentation for more information.
#[allow(missing_docs)]
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum UnOpType {
IntNegate,
......
......@@ -3,7 +3,7 @@ use crate::prelude::*;
use crate::utils::log::LogMessage;
use std::collections::HashSet;
pub mod builder;
mod builder;
/// A term identifier consisting of an ID string (which is required to be unique)
/// and an address to indicate where the term is located.
......@@ -66,20 +66,30 @@ pub struct Term<T> {
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Def {
/// A memory load into the register given by `var`.
///
/// The size of `var` also determines the number of bytes read from memory.
/// The size of `address` is required to match the pointer size of the corresponding CPU architecture.
Load { var: Variable, address: Expression },
Load {
/// The target register of the memory load.
/// The size of `var` also determines the number of bytes read from memory.
var: Variable,
/// The expression computing the address from which to read from.
/// The size of `address` is required to match the pointer size of the corresponding CPU architecture.
address: Expression,
},
/// A memory store operation.
///
/// The size of `value` determines the number of bytes written.
/// The size of `address` is required to match the pointer size of the corresponding CPU architecture.
Store {
/// The expression computing the address that is written to.
/// The size of `address` is required to match the pointer size of the corresponding CPU architecture.
address: Expression,
/// The expression computing the value that is written to memory.
/// The size of `value` also determines the number of bytes written.
value: Expression,
},
/// A register assignment, assigning the result of the expression `value` to the register `var`.
Assign { var: Variable, value: Expression },
Assign {
/// The register that is written to.
var: Variable,
/// The expression computing the value that is assigned to the register.
value: Expression,
},
}
impl Term<Def> {
......@@ -119,9 +129,6 @@ impl Term<Def> {
///
/// `Jmp` instructions carry some semantic information with it, like whether a jump is intra- or interprocedural.
/// Note that this semantic information may not always be correct.
///
/// The targets (and return targets) of jumps are, if known, either basic blocks (`Blk`) or subroutines (`Sub`)
/// depending of the type of the jump.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Jmp {
/// A direct intraprocedural jump to the targeted `Blk` term identifier.
......@@ -129,19 +136,31 @@ pub enum Jmp {
/// An indirect intraprocedural jump to the address that the given expression evaluates to.
BranchInd(Expression),
/// A direct intraprocedural jump that is only taken if the condition evaluates to true (i.e. not zero).
CBranch { target: Tid, condition: Expression },
CBranch {
/// The term ID of the target block of the jump.
target: Tid,
/// The jump is only taken if this expression evaluates to `true`, (i.e. not zero).
condition: Expression,
},
/// A direct interprocedural jump representing a subroutine call.
///
/// Note that this is syntactically equivalent to a `Jmp::Branch`.
/// If the `return_` is `None`, then the called function does not return to its caller.
Call { target: Tid, return_: Option<Tid> },
Call {
/// The term ID of the target subroutine (`Sub`) or extern symbol of the call.
target: Tid,
/// The term ID of the block that the called function returns to.
/// May be `None` if it is assumed that the called function never returns.
return_: Option<Tid>,
},
/// An indirect interprocedural jump to the address the `target` expression evaluates to
/// and representing a subroutine call.
///
/// Note that this is syntactically equivalent to a `Jmp::BranchInd`.
/// If the `return_` is `None`, then the called function is believed to not return to its caller.
CallInd {
/// An expression computing the target address of the call.
target: Expression,
/// The term ID of the block that the called function returns to.
/// May be `None` if it is assumed that the called function never returns.
return_: Option<Tid>,
},
/// A indirect interprocedural jump indicating a return from a subroutine.
......@@ -154,11 +173,11 @@ pub enum Jmp {
/// E.g. syscalls and other interrupts are mapped to `CallOther`.
/// Assembly instructions that the disassembler does not support are also mapped to `CallOther`.
/// One can use the `description` field to match for and handle known side effects (e.g. syscalls).
///
/// The `return_` field indicates the `Blk` term identifier
/// where the disassembler assumes that execution will continue after handling of the side effect.
CallOther {
/// A description of the side effect.
description: String,
/// The block term identifier of the block
/// where the disassembler assumes that execution will continue after handling of the side effect.
return_: Option<Tid>,
},
}
......@@ -234,8 +253,12 @@ impl Term<Jmp> {
/// the block structure needs to be updated accordingly.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Blk {
/// The `Def` instructions of the basic block in order of execution.
pub defs: Vec<Term<Def>>,
/// The `Jmp` instructions of the basic block
pub jmps: Vec<Term<Jmp>>,
/// If the basic block contains an indirect jump,
/// this field contains possible jump target addresses for the jump.
pub indirect_jmp_targets: Vec<String>,
}
......@@ -290,17 +313,24 @@ pub struct Sub {
/// A parameter or return argument of a function.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Arg {
/// The argument is passed in a register
/// The argument is passed in the given register
Register(Variable),
/// The argument is passed on the stack.
/// It is positioned at the given offset (in bytes) relative to the stack pointer on function entry
/// and has the given size.
Stack { offset: i64, size: ByteSize },
Stack {
/// The position of the argument on the stack
/// given as offset relative to the stack pointer on function entry.
offset: i64,
/// The size in bytes of the argument.
size: ByteSize,
},
}
/// An extern symbol represents a funtion that is dynamically linked from another binary.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct ExternSymbol {
/// The term ID of the extern symbol.
pub tid: Tid,
/// Addresses of possibly multiple locations of the same extern symbol
pub addresses: Vec<String>,
......
//! This module contains the implementations of various builder functions
//! for different terms.
#[cfg(test)]
use crate::intermediate_representation::{Expression, Variable};
......
......@@ -11,8 +11,11 @@ use crate::prelude::*;
/// (but never more than one function).
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
pub struct Variable {
/// The name of the variable. Equals the register name if the variable is a physical register.
pub name: String,
/// The size (in bytes) of the variable.
pub size: ByteSize,
/// Set to `false` for physical registers and to `true` for temporary (virtual) variables.
pub is_temp: bool,
}
......
/*!
# cwe_checker_rs
The main library of the cwe_checker containing all CWE checks and analysis modules.
Parts of the cwe_checker that are written in Rust.
# What is the cwe_checker
The cwe_checker is a tool for finding common bug classes on binaries using static analysis.
These bug classes are formally known as [Common Weakness Enumerations](https://cwe.mitre.org/) (CWEs).
Its main goal is to aid analysts to quickly find vulnerable code paths.
Currently its main focus are ELF binaries that are commonly found on Linux and Unix operating systems.
The cwe_checker uses [Ghidra](https://ghidra-sre.org/) to disassemble binaries into one common intermediate representation
and implements its own analyses on this IR.
Hence, the analyses can be run on all CPU architectures that Ghidra can disassemble,
which makes the *cwe_checker* a valuable tool for firmware analysis.
# Usage
If the cwe_checker is installed locally, just run
```sh
cwe_checker BINARY
```
If you want to use the official docker image, you have to mount the input binary into the docker container, e.g.
```sh
docker run --rm -v $(pwd)/BINARY:/input fkiecad/cwe_checker /input
```
One can modify the behaviour of the cwe_checker through the command line.
Use the `--help` command line option for more information.
One can also provide a custom configuration file to modify the behaviour of each check
through the `--config` command line option.
Start by taking a look at the standard configuration file located at `src/config.json`
and read the [check-specific documentation](crate::checkers) for more details about each field in the configuration file.
# Integration into other tools
### Integration into Ghidra
To import the results of the cwe_checker as bookmarks and end-of-line comments into Ghidra,
one can use the Ghidra script located at `ghidra_plugin/cwe_checker_ghidra_plugin.py`.
Detailed usage instructions are contained in the file.
### Integration into FACT
[FACT](https://github.com/fkie-cad/FACT_core) already contains a ready-to-use cwe_checker plugin,
which lets you run the cwe_checker and view its result through the FACT user interface.
# Further documentation
You can find out more information about each check, including known false positives and false negatives,
by reading the check-specific module documentation in the [`checkers`] module.
*/
use crate::analysis::graph::Graph;
......@@ -34,8 +79,12 @@ pub type CweModuleFn =
/// A structure containing general information about a CWE analysis module,
/// including the function to be called to run the analysis.
pub struct CweModule {
/// The name of the CWE check.
pub name: &'static str,
/// The version number of the CWE check.
/// Should be incremented whenever significant changes are made to the check.
pub version: &'static str,
/// The function that executes the check and returns CWE warnings found during the check.
pub run: CweModuleFn,
}
......
......@@ -7,12 +7,19 @@ use crate::intermediate_representation::UnOpType as IrUnOpType;
use crate::intermediate_representation::Variable as IrVariable;
use crate::prelude::*;
/// A variable representing a varnode in Ghidra P-Code
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Variable {
/// The name of the register if the varnode represents a register
pub name: Option<String>,
/// The value of the varnode if it represents a constant
pub value: Option<String>,
/// If the varnode represents an implicit `LOAD` from memory,
/// the (necessarily constant) address of the `LOAD`.
pub address: Option<String>,
/// The size (in bytes) of the varnode
pub size: ByteSize,
/// A flag set to `true` for virtual/temporary registers.
pub is_virtual: bool,
}
......@@ -110,11 +117,21 @@ impl Variable {
}
}
/// A P-Code expression.
///
/// P-Code itself does not divide instructions into expressions, definitions and jumps,
/// like in the internally used IR.
/// This type roughly corresponds to P-Code instructions without side effects
/// (except for assigning to the output register).
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Expression {
/// The instruction mnemonic
pub mnemonic: ExpressionType,
/// The first input varnode (if it exists).
pub input0: Option<Variable>,
/// The second input varnode (if it exists).
pub input1: Option<Variable>,
/// The third input varnode (if it exists).
pub input2: Option<Variable>,
}
......@@ -152,6 +169,8 @@ impl From<Expression> for IrExpression {
}
}
/// Expression Opcodes as parsed from Ghidra
#[allow(missing_docs)]
#[allow(non_camel_case_types)]
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum ExpressionType {
......@@ -316,11 +335,16 @@ impl From<ExpressionType> for IrCastOpType {
}
}
/// Properties of a register with respect to its base register.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct RegisterProperties {
/// The register name.
pub register: String,
/// The name of the base register.
pub base_register: String,
/// The least significant byte of the register when viewed as a sub-register of the base register.
pub lsb: ByteSize,
/// The size (in bytes) of the register
pub size: ByteSize,
}
......
//! Types to describe Ghidra P-Code
//! and functions to translate it to the internally used intermediate representation.
//!
//! The types in this module are not an exact representation of P-Code,
//! as some preprocessing is already done in the P-Code-Extractor plugin.
//!
//! The contents of this module are only used for the initial translation of P-Code into the internally used IR.
//! For everything else the [`intermediate_representation`](crate::intermediate_representation) should be used directly.
mod expressions;
pub use expressions::*;
mod term;
......
......@@ -16,23 +16,39 @@ use crate::prelude::*;
// TODO: Handle the case where an indirect tail call is represented by CALLIND plus RETURN
// TODO: Since we do not support BAP anymore, this module should be refactored
// to remove BAP-specific artifacts like the jump label type.
/// A call instruction.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Call {
/// The target label. May be `None` for `CALLOTHER` instructions.
pub target: Option<Label>,
/// The return label if the call is expected to return.
#[serde(rename = "return")]
pub return_: Option<Label>,
/// A description of the instruction for `CALLOTHER` instructions.
pub call_string: Option<String>,
}
/// A jump instruction.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Jmp {
/// The mnemonic of the jump.
pub mnemonic: JmpType,
/// The target label for intraprocedural jumps.
pub goto: Option<Label>,
/// The call struct for interprocedural jumps.
pub call: Option<Call>,
/// If the jump is a conditional jump,
/// the varnode that has to evaluate to `true` for the jump to be taken.
pub condition: Option<Variable>,
/// A list of potential jump targets for indirect jumps.
pub target_hints: Option<Vec<String>>,
}
/// A jump type mnemonic.
#[allow(missing_docs)]
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum JmpType {
BRANCH,
......@@ -111,15 +127,21 @@ impl From<Jmp> for IrJmp {
}
}
/// A jump label for distinguishing between direct and indirect jumps.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Label {
/// The term identifier of the target of a direct jump.
Direct(Tid),
/// The varnode holding the target address of an indirect jump.
Indirect(Variable),
}
/// An assignment instruction, assigning the result of an expression to a varnode.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Def {
/// The target varnode whose value gets overwritten.
pub lhs: Option<Variable>,
/// The expression that determines the value to be written.
pub rhs: Expression,
}
......@@ -183,9 +205,12 @@ impl Def {
}
}
/// A basic block.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Blk {
/// The `Def` instructions of the block in chronological order.
pub defs: Vec<Term<Def>>,
/// The jump instructions at the end of the basic block.
pub jmps: Vec<Term<Jmp>>,
}
......@@ -267,22 +292,33 @@ impl Blk {
}
}
/// An argument (parameter or return value) of an extern symbol.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Arg {
/// The register containing the argument if it is passed in a register.
pub var: Option<Variable>,
/// The expression computing the location of the argument if it is passed on the stack.
pub location: Option<Expression>,
/// The intent (input or output) of the argument.
pub intent: ArgIntent,
}
/// The intent (input or output) of a function argument.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum ArgIntent {
/// The argument is an input parameter.
INPUT,
/// The argument is a return value.
OUTPUT,
}
/// A subfunction.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Sub {
/// The name of the function.
pub name: String,
/// The basic blocks of the function.
/// The first block of the array is also the entry point into the function.
pub blocks: Vec<Term<Blk>>,
}
......@@ -303,13 +339,21 @@ impl From<Sub> for IrSub {
}
}
/// An extern symbol, i.e. a function not contained in the binary but loaded from a shared library.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct ExternSymbol {
/// The term identifier of the extern symbol.
pub tid: Tid,
/// The addresses to call the extern symbol.
/// May be more than one, since we also identify thunk functions calling the extern symbol with the symbol itself.
pub addresses: Vec<String>,
/// The name of the extern symbol.
pub name: String,
/// The calling convention used (as reported by Ghidra, i.e. this may not be correct).
pub calling_convention: Option<String>,
/// The input and output arguments of the function.
pub arguments: Vec<Arg>,
/// If the function is assumed to never return to the caller, this flag is set to `true`.
pub no_return: bool,
}
......@@ -359,11 +403,19 @@ impl From<ExternSymbol> for IrExternSymbol {
}
}
/// The program struct containing all information about the binary
/// except for CPU-architecture-related information.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Program {
/// The subfunctions contained in the binary.
pub subs: Vec<Term<Sub>>,
/// The extern symbols referenced by the binary.
pub extern_symbols: Vec<ExternSymbol>,
/// The term identifiers of entry points into the binary.
pub entry_points: Vec<Tid>,
/// The base address of the memory image of the binary in RAM as reported by Ghidra.
///
/// Note that Ghidra may add an offset to the image base address as reported by the binary itself.
pub image_base: String,
}
......@@ -400,13 +452,19 @@ impl Program {
}
}
/// A struct describing a calling convention.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct CallingConvention {
/// The name of the calling convention.
#[serde(rename = "calling_convention")]
pub name: String,
/// Possible parameter registers.
parameter_register: Vec<String>,
/// Possible return registers.
return_register: Vec<String>,
/// Callee-saved registers.
unaffected_register: Vec<String>,
/// Registers that may be overwritten by the call, i.e. caller-saved registers.
killed_by_call_register: Vec<String>,
}
......@@ -421,12 +479,18 @@ impl From<CallingConvention> for IrCallingConvention {
}
}
/// The project struct describing all known information about the binary.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Project {
/// The program struct containing all binary-specific information.
pub program: Term<Program>,
/// The CPU-architecture that the binary uses.
pub cpu_architecture: String,
/// The stack pointer register of the CPU-architecture.
pub stack_pointer_register: Variable,
/// Information about all CPU-architecture-specific registers.
pub register_properties: Vec<RegisterProperties>,
/// Information about known calling conventions for the given CPU architecture.
pub register_calling_convention: Vec<CallingConvention>,
}
......
//! Helper functions for common tasks utilizing the control flow graph of the binary.
use crate::analysis::graph::*;
use crate::intermediate_representation::Jmp;
use crate::prelude::*;
......
//! Structs and functions for generating log messages and CWE warnings.
use crate::prelude::*;
use std::thread::JoinHandle;
/// A CWE warning message.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord, Default)]
pub struct CweWarning {
/// A short name of the CWE check, e.g. `CWE190`.
pub name: String,
/// The version number of the check.
pub version: String,
/// Addresses in the binary associated with the CWE warning.
/// The first address usually denotes the program point where the CWE warning was generated.
pub addresses: Vec<String>,
/// Term IDs associated to the CWE warning.
/// May be more exact than the addresses, e.g. for `Def` terms.
pub tids: Vec<String>,
/// Symbol names (usually of extern symbols) associated to the CWE warning.
pub symbols: Vec<String>,
/// Other useful information. Content depends on the check that generated the CWE warning.
pub other: Vec<Vec<String>>,
/// A short description of the warning that is presented to the user.
/// Should contain all essential information necessary to understand the warning,
/// including the address in the binary for which the warning was generated.
pub description: String,
}
......
//! This module contains various utility modules and helper functions.
pub mod binary;
pub mod graph_utils;
pub mod log;
......
//! Helper functions for common tasks utilizing extern symbols,
//! e.g. searching for calls to a specific extern symbol.
use std::collections::HashMap;
use crate::intermediate_representation::*;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment