Unverified Commit 46af37e8 by Melvin Klimke Committed by GitHub

CWE 134: Externally provided format string (#187)

parent 9eafce76
...@@ -84,6 +84,7 @@ Source code documentation can be built with `make documentation`. For the stable ...@@ -84,6 +84,7 @@ Source code documentation can be built with `make documentation`. For the stable
So far the following analyses are implemented: So far the following analyses are implemented:
- [CWE-78](https://cwe.mitre.org/data/definitions/78.html): OS Command Injection (currently disabled on standard runs) - [CWE-78](https://cwe.mitre.org/data/definitions/78.html): OS Command Injection (currently disabled on standard runs)
- [CWE-134](https://cwe.mitre.org/data/definitions/134.html): Use of Externally-Controlled Format String
- [CWE-119](https://cwe.mitre.org/data/definitions/119.html) and its variants [CWE-125](https://cwe.mitre.org/data/definitions/125.html) and [CWE-787](https://cwe.mitre.org/data/definitions/787.html): Buffer Overflow - [CWE-119](https://cwe.mitre.org/data/definitions/119.html) and its variants [CWE-125](https://cwe.mitre.org/data/definitions/125.html) and [CWE-787](https://cwe.mitre.org/data/definitions/787.html): Buffer Overflow
- [CWE-190](https://cwe.mitre.org/data/definitions/190.html): Integer Overflow or Wraparound - [CWE-190](https://cwe.mitre.org/data/definitions/190.html): Integer Overflow or Wraparound
- [CWE-215](https://cwe.mitre.org/data/definitions/215.html): Information Exposure Through Debug Information - [CWE-215](https://cwe.mitre.org/data/definitions/215.html): Information Exposure Through Debug Information
......
...@@ -143,7 +143,7 @@ fn run_with_ghidra(args: CmdlineArgs) { ...@@ -143,7 +143,7 @@ fn run_with_ghidra(args: CmdlineArgs) {
&project, &project,
); );
let modules_depending_on_pointer_inference = vec!["CWE78", "CWE476", "Memory"]; let modules_depending_on_pointer_inference = vec!["CWE78", "CWE134", "CWE476", "Memory"];
let pointer_inference_results = if modules let pointer_inference_results = if modules
.iter() .iter()
.any(|module| modules_depending_on_pointer_inference.contains(&module.name)) .any(|module| modules_depending_on_pointer_inference.contains(&module.name))
......
...@@ -25,6 +25,26 @@ ...@@ -25,6 +25,26 @@
"__isoc99_sscanf": 1 "__isoc99_sscanf": 1
} }
}, },
"CWE134": {
"format_string_symbols": [
"sprintf",
"snprintf",
"printf",
"scanf",
"__isoc99_scanf",
"sscanf",
"__isoc99_sscanf"
],
"format_string_index": {
"sprintf": 1,
"snprintf": 2,
"printf": 0,
"scanf": 0,
"__isoc99_scanf": 0,
"sscanf": 1,
"__isoc99_sscanf": 1
}
},
"CWE190": { "CWE190": {
"symbols": [ "symbols": [
"xmalloc", "xmalloc",
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
//! but directly incorporated into the [`pointer_inference`](crate::analysis::pointer_inference) module. //! but directly incorporated into the [`pointer_inference`](crate::analysis::pointer_inference) module.
//! See there for detailed information about this check. //! See there for detailed information about this check.
pub mod cwe_134;
pub mod cwe_190; pub mod cwe_190;
pub mod cwe_215; pub mod cwe_215;
pub mod cwe_243; pub mod cwe_243;
......
//! This module implements a check for CWE-134: Use of Externally-Controlled Format String.
//!
//! The software uses a function that accepts a format string as an argument,
//! but the format string originates from an external source.
//!
//! See <https://cwe.mitre.org/data/definitions/134.html> for a detailed description.
//!
//! ## How the check works
//!
//! Using forward dataflow analysis we search for external symbols that take a format string as an input parameter.
//! (e.g. sprintf). Then we check the content of the format string parameter and if it is not part of the global read only
//! memory of the binary, a CWE warning is generated.
//!
//! ### Symbols configurable in config.json
//!
//! - symbols that take a format string parameter.
//!
//! ## False Positives
//!
//! - The input was externally provided on purpose and originates from a trusted source.
//! - A pointer target could be lost but the format string was not externally provided.
use std::collections::HashMap;
use petgraph::graph::NodeIndex;
use petgraph::visit::EdgeRef;
use crate::abstract_domain::TryToBitvec;
use crate::analysis::graph::Edge;
use crate::analysis::interprocedural_fixpoint_generic::NodeValue;
use crate::analysis::pointer_inference::PointerInference;
use crate::intermediate_representation::ExternSymbol;
use crate::intermediate_representation::Jmp;
use crate::intermediate_representation::Variable;
use crate::prelude::*;
use crate::utils::binary::RuntimeMemoryImage;
use crate::utils::log::CweWarning;
use crate::utils::log::LogMessage;
use crate::CweModule;
/// The module name and version
pub static CWE_MODULE: CweModule = CweModule {
name: "CWE134",
version: "0.1",
run: check_cwe,
};
/// The configuration struct
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct Config {
/// The names of the system call symbols.
format_string_symbols: Vec<String>,
/// The index of the format string paramater of the symbol.
format_string_index: HashMap<String, usize>,
}
/// The categorization of the string location based on kinds of different memory.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub enum StringLocation {
/// Global read only memory
GlobalReadable,
/// Global read and write memory
GlobalWriteable,
/// Non Global memory
NonGlobal,
/// Unknown memory
Unknown,
}
/// This check searches for external symbols that take a format string as an input parameter.
/// It then checks whether the parameter points to read only memory.
/// If not, a CWE warning is generated.
pub fn check_cwe(
analysis_results: &AnalysisResults,
cwe_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let project = analysis_results.project;
let config: Config = serde_json::from_value(cwe_params.clone()).unwrap();
let format_string_symbols =
crate::utils::symbol_utils::get_symbol_map(project, &config.format_string_symbols[..]);
let format_string_index = config.format_string_index.clone();
let pointer_inference_results = analysis_results.pointer_inference.unwrap();
let mut cwe_warnings = Vec::new();
for edge in pointer_inference_results.get_graph().edge_references() {
if let Edge::ExternCallStub(jmp) = edge.weight() {
if let Jmp::Call { target, .. } = &jmp.term {
if let Some(symbol) = format_string_symbols.get(target) {
let location = locate_format_string(
&edge.source(),
symbol,
&format_string_index,
pointer_inference_results,
analysis_results.runtime_memory_image,
&project.stack_pointer_register,
);
if matches!(
location,
StringLocation::GlobalWriteable | StringLocation::NonGlobal
) {
cwe_warnings.push(generate_cwe_warning(&jmp.tid, symbol, &location));
}
}
}
}
}
(Vec::new(), cwe_warnings)
}
/// Returns a StringLocation based on the kind of memory
/// holding the string.
/// If no assumption about the string location can be made,
/// unknown is returned.
fn locate_format_string(
node: &NodeIndex,
symbol: &ExternSymbol,
format_string_index: &HashMap<String, usize>,
pointer_inference_results: &PointerInference,
runtime_memory_image: &RuntimeMemoryImage,
stack_pointer: &Variable,
) -> StringLocation {
if let Some(NodeValue::Value(pi_state)) = pointer_inference_results.get_node_value(*node) {
let format_string_parameter = symbol
.parameters
.get(*format_string_index.get(&symbol.name).unwrap())
.unwrap();
if let Ok(address) = pi_state.eval_parameter_arg(
format_string_parameter,
stack_pointer,
runtime_memory_image,
) {
if let Ok(address_vector) = address.try_to_bitvec() {
if runtime_memory_image.is_global_memory_address(&address_vector) {
if runtime_memory_image
.is_address_writeable(&address_vector)
.unwrap()
{
return StringLocation::GlobalWriteable;
}
return StringLocation::GlobalReadable;
}
}
}
return StringLocation::NonGlobal;
}
StringLocation::Unknown
}
/// Generate the CWE warning for a detected instance of the CWE.
fn generate_cwe_warning(
callsite: &Tid,
called_symbol: &ExternSymbol,
location: &StringLocation,
) -> CweWarning {
let description = match location {
StringLocation::GlobalWriteable => {
format!(
"(Externally Controlled Format String) Potential externally controlled format string in global memory for call to {} at {}",
called_symbol.name, callsite.address
)
}
StringLocation::NonGlobal => {
format!(
"(Externally Controlled Format String) Potential externally controlled format string for call to {} at {}",
called_symbol.name, callsite.address
)
}
_ => panic!("Invalid String Location."),
};
CweWarning::new(CWE_MODULE.name, CWE_MODULE.version, description)
.tids(vec![format!("{}", callsite)])
.addresses(vec![callsite.address.clone()])
.symbols(vec![called_symbol.name.clone()])
}
#[cfg(test)]
pub mod tests {
use std::collections::HashSet;
use crate::analysis::pointer_inference::PointerInference as PointerInferenceComputation;
use crate::intermediate_representation::{Blk, Def, Expression, Jmp, Project, Sub};
use super::*;
fn mock_project() -> Project {
let mut project = Project::mock_empty();
let mut sub = Sub::mock("func");
let mut block1 = Blk::mock_with_tid("block1");
let block2 = Blk::mock_with_tid("block2");
let def1 = Def::assign(
"def2",
Variable::mock("RDI", 8 as u64),
Expression::var("RBP").plus_const(8),
);
let def2 = Def::assign(
"def3",
Variable::mock("RSI", 8 as u64),
Expression::Const(Bitvector::from_str_radix(16, "3002").unwrap()),
);
let jump = Jmp::call("call_string", "sprintf", Some("block2"));
block1.term.defs.push(def1);
block1.term.defs.push(def2);
block1.term.jmps.push(jump);
sub.term.blocks.push(block1);
sub.term.blocks.push(block2);
project.program.term.subs.push(sub);
project.program.term.entry_points.push(Tid::new("func"));
project
}
#[test]
fn test_locate_format_string() {
let sprintf_symbol = ExternSymbol::mock_string();
let stack_pointer = Variable::mock("RSP", ByteSize::new(8));
let runtime_memory_image = RuntimeMemoryImage::mock();
let project = mock_project();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results =
PointerInferenceComputation::mock(&project, &runtime_memory_image, &graph);
pi_results.compute();
let mut format_string_index: HashMap<String, usize> = HashMap::new();
format_string_index.insert("sprintf".to_string(), 1);
// Get the BlkEnd node with the function call.
let node = graph
.node_indices()
.into_iter()
.collect::<Vec<NodeIndex>>()
.get(1)
.unwrap()
.clone();
assert_eq!(
locate_format_string(
&node,
&sprintf_symbol,
&format_string_index,
&pi_results,
&runtime_memory_image,
&stack_pointer
),
StringLocation::GlobalReadable
);
}
}
...@@ -9,18 +9,6 @@ use crate::{ ...@@ -9,18 +9,6 @@ use crate::{
intermediate_representation::{Expression, Variable}, intermediate_representation::{Expression, Variable},
}; };
// TODO: change actual mock function for blocks to receive a TID parameter and then remove this function
fn mock_block(tid: &str) -> Term<Blk> {
Term {
tid: Tid::new(tid),
term: Blk {
defs: Vec::new(),
jmps: Vec::new(),
indirect_jmp_targets: Vec::new(),
},
}
}
pub fn bv(value: i64) -> ValueDomain { pub fn bv(value: i64) -> ValueDomain {
ValueDomain::from(Bitvector::from_i64(value)) ValueDomain::from(Bitvector::from_i64(value))
} }
...@@ -87,8 +75,8 @@ impl Setup { ...@@ -87,8 +75,8 @@ impl Setup {
}; };
let mut project = Project::mock_empty(); let mut project = Project::mock_empty();
let mut sub = Sub::mock("func"); let mut sub = Sub::mock("func");
let mut block1 = mock_block("block1"); let mut block1 = Blk::mock_with_tid("block1");
let block2 = mock_block("block2"); let block2 = Blk::mock_with_tid("block2");
let def1 = Def::assign( let def1 = Def::assign(
"def1", "def1",
Variable::mock("RBP", 8 as u64), Variable::mock("RBP", 8 as u64),
......
...@@ -751,6 +751,17 @@ mod tests { ...@@ -751,6 +751,17 @@ mod tests {
}, },
} }
} }
pub fn mock_with_tid(tid: &str) -> Term<Blk> {
Term {
tid: Tid::new(tid),
term: Blk {
defs: Vec::new(),
jmps: Vec::new(),
indirect_jmp_targets: Vec::new(),
},
}
}
} }
impl Sub { impl Sub {
......
...@@ -99,6 +99,7 @@ impl std::fmt::Display for CweModule { ...@@ -99,6 +99,7 @@ impl std::fmt::Display for CweModule {
pub fn get_modules() -> Vec<&'static CweModule> { pub fn get_modules() -> Vec<&'static CweModule> {
vec![ vec![
&crate::checkers::cwe_78::CWE_MODULE, &crate::checkers::cwe_78::CWE_MODULE,
&crate::checkers::cwe_134::CWE_MODULE,
&crate::checkers::cwe_190::CWE_MODULE, &crate::checkers::cwe_190::CWE_MODULE,
&crate::checkers::cwe_215::CWE_MODULE, &crate::checkers::cwe_215::CWE_MODULE,
&crate::checkers::cwe_243::CWE_MODULE, &crate::checkers::cwe_243::CWE_MODULE,
......
#include <stdio.h>
int main(int argc, char **argv){
char buf[128];
snprintf(buf,128,argv[1]);
return 0;
}
\ No newline at end of file
...@@ -256,6 +256,29 @@ mod tests { ...@@ -256,6 +256,29 @@ mod tests {
#[test] #[test]
#[ignore] #[ignore]
fn cwe_134() {
let mut error_log = Vec::new();
let mut tests = all_test_cases("cwe_134", "CWE134");
mark_architecture_skipped(&mut tests, "ppc64"); // TODO: Check reason for failure!
mark_skipped(&mut tests, "ppc64le", "clang"); // TODO: Check reason for failure!
mark_compiler_skipped(&mut tests, "mingw32-gcc"); // TODO: Check reason for failure!
for test_case in tests {
let num_expected_occurences = 1;
if let Err(error) = test_case.run_test("[CWE134]", num_expected_occurences) {
error_log.push((test_case.get_filepath(), error));
}
}
if !error_log.is_empty() {
print_errors(error_log);
panic!();
}
}
#[test]
#[ignore]
fn cwe_190() { fn cwe_190() {
let mut error_log = Vec::new(); let mut error_log = Vec::new();
let mut tests = all_test_cases("cwe_190", "CWE190"); let mut tests = all_test_cases("cwe_190", "CWE190");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment