Unverified Commit d926369b by Melvin Klimke Committed by GitHub

add check for CWE 78: OS Command Injection (#130)

parent 9a414bab
...@@ -149,7 +149,8 @@ fn run_with_ghidra(args: CmdlineArgs) { ...@@ -149,7 +149,8 @@ fn run_with_ghidra(args: CmdlineArgs) {
let mut analysis_results = AnalysisResults::new(&binary, &runtime_memory_image, &project); let mut analysis_results = AnalysisResults::new(&binary, &runtime_memory_image, &project);
let modules_depending_on_pointer_inference = vec!["CWE243", "CWE367", "CWE476", "Memory"]; let modules_depending_on_pointer_inference =
vec!["CWE78", "CWE243", "CWE367", "CWE476", "Memory"];
let pointer_inference_results = if modules let pointer_inference_results = if modules
.iter() .iter()
.any(|module| modules_depending_on_pointer_inference.contains(&module.name)) .any(|module| modules_depending_on_pointer_inference.contains(&module.name))
......
...@@ -200,6 +200,11 @@ impl<T: AbstractDomain + SizedDomain + HasTop + std::fmt::Debug> MemRegionData<T ...@@ -200,6 +200,11 @@ impl<T: AbstractDomain + SizedDomain + HasTop + std::fmt::Debug> MemRegionData<T
self.values.values() self.values.values()
} }
/// Get the map of all elements including their offset into the memory region.
pub fn entry_map(&self) -> &BTreeMap<i64, T> {
&self.values
}
/// Get an iterator over all values in the memory region for in-place manipulation. /// Get an iterator over all values in the memory region for in-place manipulation.
/// Note that one can changes values to *Top* using the iterator. /// Note that one can changes values to *Top* using the iterator.
/// These values should be removed from the memory region using `clear_top_values()`. /// These values should be removed from the memory region using `clear_top_values()`.
......
...@@ -88,6 +88,7 @@ impl<'a> crate::analysis::backward_interprocedural_fixpoint::Context<'a> for Con ...@@ -88,6 +88,7 @@ impl<'a> crate::analysis::backward_interprocedural_fixpoint::Context<'a> for Con
&self, &self,
target_value: Option<&u64>, target_value: Option<&u64>,
return_value: Option<&u64>, return_value: Option<&u64>,
_caller_sub: &Term<Sub>,
_call: &Term<Jmp>, _call: &Term<Jmp>,
_return_: &Term<Jmp>, _return_: &Term<Jmp>,
) -> Option<u64> { ) -> Option<u64> {
...@@ -105,7 +106,11 @@ impl<'a> crate::analysis::backward_interprocedural_fixpoint::Context<'a> for Con ...@@ -105,7 +106,11 @@ impl<'a> crate::analysis::backward_interprocedural_fixpoint::Context<'a> for Con
} }
/// Simply copy the value /// Simply copy the value
fn split_return_stub(&self, combined_value: &u64) -> Option<u64> { fn split_return_stub(
&self,
combined_value: &u64,
_returned_from_sub: &Term<Sub>,
) -> Option<u64> {
Some(*combined_value) Some(*combined_value)
} }
......
...@@ -59,6 +59,7 @@ pub trait Context<'a> { ...@@ -59,6 +59,7 @@ pub trait Context<'a> {
&self, &self,
target_value: Option<&Self::Value>, target_value: Option<&Self::Value>,
return_value: Option<&Self::Value>, return_value: Option<&Self::Value>,
caller_sub: &Term<Sub>,
call: &Term<Jmp>, call: &Term<Jmp>,
return_: &Term<Jmp>, return_: &Term<Jmp>,
) -> Option<Self::Value>; ) -> Option<Self::Value>;
...@@ -71,7 +72,11 @@ pub trait Context<'a> { ...@@ -71,7 +72,11 @@ pub trait Context<'a> {
/// Transition function for return stub split. /// Transition function for return stub split.
/// Has access to the value at the ReturnCombine node and /// Has access to the value at the ReturnCombine node and
/// decides which data is transferred along the Return Stub Edge. /// decides which data is transferred along the Return Stub Edge.
fn split_return_stub(&self, combined_value: &Self::Value) -> Option<Self::Value>; fn split_return_stub(
&self,
combined_value: &Self::Value,
returned_from_sub: &Term<Sub>,
) -> Option<Self::Value>;
/// Transition function for calls to functions not contained in the binary. /// Transition function for calls to functions not contained in the binary.
/// The corresponding edge goes from the callsite to the returned-to block. /// The corresponding edge goes from the callsite to the returned-to block.
...@@ -165,10 +170,16 @@ impl<'a, T: Context<'a>> GeneralFPContext for GeneralizedContext<'a, T> { ...@@ -165,10 +170,16 @@ impl<'a, T: Context<'a>> GeneralFPContext for GeneralizedContext<'a, T> {
}), }),
// The user has the ability to split the node value at the BlkStart return node // The user has the ability to split the node value at the BlkStart return node
// to only send specific data along the ReturnStub Edge to the last BlkEnd node called subroutine // to only send specific data along the ReturnStub Edge to the last BlkEnd node called subroutine
Edge::CRReturnStub => self Edge::CRReturnStub => {
.context // The subroutine term from which the program returns
.split_return_stub(node_value.unwrap_value()) let returned_from_sub = match graph.node_weight(end_node) {
.map(NodeValue::Value), Some(Node::BlkEnd { 0: _, 1: sub_term }) => sub_term,
_ => panic!("Malformed Control flow graph"),
};
self.context
.split_return_stub(node_value.unwrap_value(), returned_from_sub)
.map(NodeValue::Value)
}
// The CallCombine Edge merges the values coming in from the CallStub Edge and Call Edge // The CallCombine Edge merges the values coming in from the CallStub Edge and Call Edge
// It also gives the user access to the call and return term. // It also gives the user access to the call and return term.
...@@ -178,17 +189,18 @@ impl<'a, T: Context<'a>> GeneralFPContext for GeneralizedContext<'a, T> { ...@@ -178,17 +189,18 @@ impl<'a, T: Context<'a>> GeneralFPContext for GeneralizedContext<'a, T> {
call_stub, call_stub,
interprocedural_flow, interprocedural_flow,
} => { } => {
let call_block = match graph.node_weight(start_node) { let (call_block, caller_sub) = match graph.node_weight(start_node) {
Some(Node::CallSource { Some(Node::CallSource {
source: (call_block, ..), source: (call_block, call_sub),
target: _, target: _,
}) => call_block, }) => (call_block, call_sub),
_ => panic!("Malformed Control flow graph"), _ => panic!("Malformed Control flow graph"),
}; };
let call_term = &call_block.term.jmps[0]; let call_term = &call_block.term.jmps[0];
match self.context.update_callsite( match self.context.update_callsite(
interprocedural_flow.as_ref(), interprocedural_flow.as_ref(),
call_stub.as_ref(), call_stub.as_ref(),
caller_sub,
call_term, call_term,
return_term, return_term,
) { ) {
......
...@@ -17,7 +17,7 @@ pub struct State { ...@@ -17,7 +17,7 @@ pub struct State {
/// The list of all known memory objects. /// The list of all known memory objects.
pub memory: AbstractObjectList, pub memory: AbstractObjectList,
/// The abstract identifier of the current stack frame. /// The abstract identifier of the current stack frame.
/// It points to the to the base of the stack frame, i.e. only negative offsets point into the current stack frame. /// It points to the base of the stack frame, i.e. only negative offsets point into the current stack frame.
pub stack_id: AbstractIdentifier, pub stack_id: AbstractIdentifier,
/// All known IDs of caller stack frames. /// All known IDs of caller stack frames.
/// Note that these IDs are named after the callsite, /// Note that these IDs are named after the callsite,
......
...@@ -8,4 +8,5 @@ pub mod cwe_467; ...@@ -8,4 +8,5 @@ pub mod cwe_467;
pub mod cwe_476; pub mod cwe_476;
pub mod cwe_560; pub mod cwe_560;
pub mod cwe_676; pub mod cwe_676;
pub mod cwe_78;
pub mod cwe_782; pub mod cwe_782;
...@@ -50,8 +50,8 @@ use std::collections::HashMap; ...@@ -50,8 +50,8 @@ use std::collections::HashMap;
mod state; mod state;
use state::*; use state::*;
mod taint; pub mod taint;
use taint::*; pub use taint::*;
mod context; mod context;
use context::*; use context::*;
......
//! This module implements a check for CWE-78: Improper Neutralization of Special Elements used in an OS Command ('OS Command Injection').
//!
//! The software constructs all or part of an OS command using externally-influenced input from an upstream component,
//! but it does not neutralize or incorrectly neutralizes special elements that could modify the intended OS command
//! when it is sent to a downstream component.
//!
//! See <https://cwe.mitre.org/data/definitions/78.html> for a detailed description.
//!
//! ## How the check works
//!
//! Using backward dataflow analysis we search for an executation path from a system call parameter (string) to an user input
//! to identify possible command injections.
//!
//! To find relevant string related functions, such as sprintf, it is assumed that the first input parameter points
//! to the memory position that will be used as the return location. (e.g. char *strcat(char *dest, const char *src)
//! where 'char *dest' will contain the return value)
//!
//! For instance:
//! ...
//! MOV RAX, qword ptr [RBP + local_10]
//! MOV RDI, RAX // RDI is the first input parameter for the strcat call and it points to [RBP + local_10]
//! CALL strcat
//! MOV RAX, qword ptr [RBP + local_10] // In the backwards analysis [RBP + local_10] will be tainted and it contains the return value
//! ...
//!
//! ### Symbols configurable in config.json
//!
//! The symbols are the functions which
//! 1. make system calls (e.g. system)
//! 2. manipulate strings (e.g. sprintf, strcat, memcpy, etc.)
//! 3. take user input (e.g. scanf)
//!
//! ## False Positives
//!
//! - The input comes from the user but proper sanitization was not detected by the analysis even though it exists.
//! - The input comes from the user but the format string's input format could not be distinguished as non-string input.
//!
//! ## False Negatives
//!
//! - Missing Taints due to lost track of pointer targets
//! - Non tracked function parameters cause incomplete taints that could miss possible dangerous inputs
use std::collections::HashMap;
use crate::{
analysis::{
backward_interprocedural_fixpoint::{create_computation, Context as _},
graph::{self, Edge, Node},
interprocedural_fixpoint_generic::NodeValue,
},
intermediate_representation::{Jmp, Project, Sub},
prelude::*,
utils::log::{CweWarning, LogMessage},
AnalysisResults, CweModule,
};
use petgraph::{graph::NodeIndex, visit::EdgeRef};
mod state;
use state::*;
mod context;
use context::*;
pub static CWE_MODULE: CweModule = CweModule {
name: "CWE78",
version: "0.1",
run: check_cwe,
};
/// The configuration struct
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Config {
/// The names of the system call symbols
system_symbols: Vec<String>,
/// The names of the string manipulating symbols
string_symbols: Vec<String>,
/// The name of the user input symbols
user_input_symbols: Vec<String>,
}
/// This check searches for system calls and sets their parameters as taint source if available.
/// Then the fixpoint computation is executed and its result may generate cwe warnings if
/// the parameters can be tracked back to user inputs
pub fn check_cwe(
analysis_results: &AnalysisResults,
cwe_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let project = analysis_results.project;
let pointer_inference_results = analysis_results.pointer_inference.unwrap();
let (cwe_sender, cwe_receiver) = crossbeam_channel::unbounded();
let config: Config = serde_json::from_value(cwe_params.clone()).unwrap();
let system_symbols =
crate::utils::symbol_utils::get_symbol_map(project, &config.system_symbols[..]);
let string_symbols =
crate::utils::symbol_utils::get_symbol_map(project, &config.string_symbols[..]);
let user_input_symbols =
crate::utils::symbol_utils::get_symbol_map(project, &config.user_input_symbols[..]);
let general_context = Context::new(
project,
analysis_results.runtime_memory_image,
&pointer_inference_results,
string_symbols,
user_input_symbols,
cwe_sender,
);
let entry_sub_to_entry_node_map = get_entry_sub_to_entry_node_map(project, &general_context);
for edge in general_context.get_pi_graph().edge_references() {
if let Edge::ExternCallStub(jmp) = edge.weight() {
if let Jmp::Call { target, .. } = &jmp.term {
if let Some(symbol) = system_symbols.get(target) {
let node = edge.source();
let current_sub = match general_context.get_pi_graph()[node] {
Node::BlkEnd(_blk, sub) => sub,
_ => panic!(),
};
let mut context = general_context.clone();
context.set_taint_source(jmp, &symbol.name, current_sub);
let pi_state_at_taint_source =
match pointer_inference_results.get_node_value(node) {
Some(NodeValue::Value(val)) => Some(val.clone()),
_ => None,
};
let mut computation = create_computation(context.clone(), None);
computation.set_node_value(
node,
NodeValue::Value(State::new(
symbol,
&project.stack_pointer_register,
pi_state_at_taint_source.as_ref(),
current_sub,
)),
);
computation.compute_with_max_steps(100);
for (sub_name, node_index) in entry_sub_to_entry_node_map.iter() {
if let Some(node_weight) = computation.get_node_value(*node_index) {
let state = node_weight.unwrap_value();
if !state.is_empty() {
context.generate_cwe_warning(sub_name);
}
}
}
}
}
}
}
let mut cwe_warnings = HashMap::new();
for cwe in cwe_receiver.try_iter() {
match &cwe.addresses[..] {
[taint_source_address, ..] => cwe_warnings.insert(taint_source_address.clone(), cwe),
_ => panic!(),
};
}
let cwe_warnings = cwe_warnings.into_iter().map(|(_, cwe)| cwe).collect();
(Vec::new(), cwe_warnings)
}
/// Returns a map from subroutine names to their corresponding start node index
fn get_entry_sub_to_entry_node_map(
project: &Project,
context: &Context,
) -> HashMap<String, NodeIndex> {
let mut entry_sub_to_entry_blocks_map = HashMap::new();
let subs: HashMap<Tid, &Term<Sub>> = project
.program
.term
.subs
.iter()
.map(|sub| (sub.tid.clone(), sub))
.collect();
for sub_tid in project.program.term.entry_points.iter() {
if let Some(sub) = subs.get(sub_tid) {
if let Some(entry_block) = sub.term.blocks.get(0) {
entry_sub_to_entry_blocks_map.insert(
(sub_tid.clone(), sub.term.name.clone()),
entry_block.tid.clone(),
);
}
}
}
let mut tid_to_graph_indices_map = HashMap::new();
for node in context.get_graph().node_indices() {
if let graph::Node::BlkStart(block, sub) = context.get_graph()[node] {
tid_to_graph_indices_map.insert((block.tid.clone(), sub.tid.clone()), node);
}
}
entry_sub_to_entry_blocks_map
.into_iter()
.filter_map(|((sub_tid, name), block_tid)| {
if let Some(start_node_index) = tid_to_graph_indices_map.get(&(block_tid, sub_tid)) {
Some((name, *start_node_index))
} else {
None
}
})
.collect()
}
#[cfg(test)]
use apint::ApInt;
#[cfg(test)]
use super::{CastOpType, Variable};
use super::{BinOpType, Expression}; use super::{BinOpType, Expression};
use crate::prelude::*; use crate::prelude::*;
/// ## Helper functions for building expressions /// ## Helper functions for building expressions
impl Expression { impl Expression {
/// Shortcut for creating a constant expression from an i64 value
#[cfg(test)]
pub fn const_from_i64(value: i64) -> Expression {
Expression::Const(Bitvector::from_i64(value))
}
/// Shortcut for creating a constant expression from an apint value (e.g. copy of global address)
#[cfg(test)]
pub fn const_from_apint(value: ApInt) -> Expression {
Expression::Const(value)
}
/// Shortcut for creating a variable expression
#[cfg(test)]
pub fn var(name: &str) -> Expression {
Expression::Var(Variable {
name: name.into(),
size: ByteSize::new(8),
is_temp: false,
})
}
/// Shortcut for creating a cast expression
#[cfg(test)]
pub fn cast(self, op: CastOpType) -> Expression {
Expression::Cast {
op,
size: ByteSize::new(8),
arg: Box::new(self),
}
}
/// Shortcut for creating a subpiece expression
#[cfg(test)]
pub fn subpiece(self, low_byte: ByteSize, size: ByteSize) -> Expression {
Expression::Subpiece {
low_byte,
size,
arg: Box::new(self),
}
}
/// Shortcut for creating an `IntAdd`-expression /// Shortcut for creating an `IntAdd`-expression
pub fn plus(self, rhs: Expression) -> Expression { pub fn plus(self, rhs: Expression) -> Expression {
Expression::BinOp { Expression::BinOp {
......
...@@ -3,6 +3,8 @@ use crate::prelude::*; ...@@ -3,6 +3,8 @@ use crate::prelude::*;
use crate::utils::log::LogMessage; use crate::utils::log::LogMessage;
use std::collections::HashSet; use std::collections::HashSet;
pub mod builder;
/// A term identifier consisting of an ID string (which is required to be unique) /// A term identifier consisting of an ID string (which is required to be unique)
/// and an address to indicate where the term is located. /// and an address to indicate where the term is located.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)] #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
......
#[cfg(test)]
use crate::intermediate_representation::{Expression, Variable};
#[cfg(test)]
use super::{Def, Jmp, Term, Tid};
/// ## Helper functions for building defs
#[cfg(test)]
impl Def {
/// Shortcut for creating a assign def
pub fn assign(tid: &str, var: Variable, value: Expression) -> Term<Def> {
Term {
tid: Tid::new(tid),
term: Def::Assign { var, value },
}
}
/// Shortcut for creating a load def
pub fn load(tid: &str, var: Variable, address: Expression) -> Term<Def> {
Term {
tid: Tid::new(tid),
term: Def::Load { var, address },
}
}
/// Shortcut for creating a store def
pub fn store(tid: &str, address: Expression, value: Expression) -> Term<Def> {
Term {
tid: Tid::new(tid),
term: Def::Store { address, value },
}
}
}
/// ## Helper functions for building jmps
#[cfg(test)]
impl Jmp {
/// Shortcut for creating a call
pub fn call(tid: &str, target_tid: &str, return_tid: Option<&str>) -> Term<Jmp> {
let return_tid = return_tid.map(|tid_name| Tid::new(tid_name));
Term {
tid: Tid::new(tid),
term: Jmp::Call {
target: Tid::new(target_tid),
return_: return_tid,
},
}
}
/// Shortcut for creating a branch
pub fn branch(tid: &str, target_tid: &str) -> Term<Jmp> {
Term {
tid: Tid::new(tid),
term: Jmp::Branch(Tid::new(target_tid)),
}
}
}
...@@ -55,6 +55,7 @@ impl std::fmt::Display for CweModule { ...@@ -55,6 +55,7 @@ impl std::fmt::Display for CweModule {
/// Get a list of all known analysis modules. /// Get a list of all known analysis modules.
pub fn get_modules() -> Vec<&'static CweModule> { pub fn get_modules() -> Vec<&'static CweModule> {
vec![ vec![
&crate::checkers::cwe_78::CWE_MODULE,
&crate::checkers::cwe_190::CWE_MODULE, &crate::checkers::cwe_190::CWE_MODULE,
&crate::checkers::cwe_215::CWE_MODULE, &crate::checkers::cwe_215::CWE_MODULE,
&crate::checkers::cwe_243::CWE_MODULE, &crate::checkers::cwe_243::CWE_MODULE,
......
{ {
"CWE78": {
"system_symbols": [
"system",
"execl"
],
"string_symbols": [
"sprintf",
"snprintf",
"strcat",
"strncat"
],
"user_input_symbols": [
"scanf",
"__isoc99_scanf"
]
},
"CWE190": { "CWE190": {
"symbols": [ "symbols": [
"xmalloc", "xmalloc",
......
#include <string.h>
#include <stdlib.h>
int constant_system() {
system("ls");
}
int main(int argc, char **argv) {
char *dest = "usr/bin/cat ";
strcat(dest, argv[1]);
system(dest);
constant_system();
return 0;
}
...@@ -170,6 +170,38 @@ mod tests { ...@@ -170,6 +170,38 @@ mod tests {
#[test] #[test]
#[ignore] #[ignore]
fn cwe_78() {
let mut error_log = Vec::new();
let mut tests = all_test_cases("cwe_78", "CWE78");
// Ghidra does not recognize all extern function calls in the disassembly step for MIPS.
// Needs own control flow graph analysis to be fixed.
mark_architecture_skipped(&mut tests, "mips64");
mark_architecture_skipped(&mut tests, "mips64el");
mark_architecture_skipped(&mut tests, "mips");
mark_architecture_skipped(&mut tests, "mipsel");
mark_architecture_skipped(&mut tests, "ppc64"); // Ghidra generates mangled function names here for some reason.
mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
mark_skipped(&mut tests, "x86", "clang"); // Return value detection insufficient for x86
mark_compiler_skipped(&mut tests, "mingw32-gcc"); // Pointer Inference returns insufficient results for PE
for test_case in tests {
let num_expected_occurences = 1;
if let Err(error) = test_case.run_test("[CWE78]", num_expected_occurences) {
error_log.push((test_case.get_filepath(), error));
}
}
if !error_log.is_empty() {
print_errors(error_log);
panic!();
}
}
#[test]
#[ignore]
fn cwe_190() { fn cwe_190() {
let mut error_log = Vec::new(); let mut error_log = Vec::new();
let mut tests = all_test_cases("cwe_190", "CWE190"); let mut tests = all_test_cases("cwe_190", "CWE190");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment