Unverified Commit 5f9d78fb by Melvin Klimke Committed by GitHub

Dyn sym refactor (#71)

Added a simple check that prints an error message when no dynamic symbol calls could be resolved.
parent 8e575abc
......@@ -490,7 +490,7 @@ let update_state_jmp state jmp ~sub_tid ~project =
let func_name = match String.lsplit2 (Tid.name tid) ~on:'@' with
| Some(_left, right) -> right
| None -> Tid.name tid in
if String.Set.mem (Cconv.parse_dyn_syms project) func_name then
if String.Set.mem (Symbol_utils.parse_dyn_syms project) func_name then
begin if List.exists (malloc_like_function_list ()) ~f:(fun elem -> elem = func_name) then
update_state_malloc_call state tid jmp ~project
......@@ -32,7 +32,7 @@ let check_cwe prog proj tid_map symbol_names _ =
match symbol_names with
| hd::[] ->
let symbols = Symbol_utils.build_symbols hd prog in
let calls = call_finder#run prog [] in
let calls = get_calls prog in
let relevant_calls = filter_calls_to_symbols calls symbols in
check_calls relevant_calls prog proj tid_map symbols check_multiplication_before_symbol
| _ -> failwith "[CWE190] symbol_names not as expected"
......@@ -17,7 +17,7 @@ let check_input_is_pointer_size proj _prog _sub blk jmp tid_map symbols =
if get_pointer_size (Project.arch proj) = (Word.to_int_exn w) then
let address = Address_translation.translate_tid_to_assembler_address_string (Term.tid blk) tid_map in
let tid = Address_translation.tid_to_string @@ Term.tid blk in
let tid = Address_translation.tid_to_string @@ Term.tid blk in
let symbol = Symbol_utils.get_symbol_name_from_jmp jmp symbols in
let description = sprintf
"(Use of sizeof on a Pointer Type) sizeof on pointer at %s (%s)."
......@@ -35,7 +35,7 @@ let check_cwe prog proj tid_map symbol_names _ =
match symbol_names with
| hd::[] ->
let symbols = Symbol_utils.build_symbols hd prog in
let calls = call_finder#run prog [] in
let calls = get_calls prog in
let relevant_calls = filter_calls_to_symbols calls symbols in
check_calls relevant_calls prog proj tid_map symbols check_input_is_pointer_size
| _ -> failwith "[CWE467] symbol_names not as expected"
......@@ -465,7 +465,7 @@ let check_cwe (_prog: Program.t) (project: Project.t) (tid_map: Word.t Tid.Map.t
| "max_steps" :: num :: [] -> int_of_string num
| _ -> failwith "[CWE476] parameters not as expected" in
let malloc_like_functions = List.map symbols ~f:(fun symb -> "@" ^ symb) in
let extern_functions = Cconv.parse_dyn_syms project in
let extern_functions = Symbol_utils.parse_dyn_syms project in
(* run the pointer inference analysis. TODO: This should be done somewhere else as this analysis will be needed in more than one check! *)
let project = Type_inference.compute_pointer_register project in
let subfunctions = Term.enum sub_t (Project.program project) in
......@@ -12,6 +12,7 @@ type cwe_module = {
has_parameters : bool;
let known_modules = [{cwe_func = Cwe_190.check_cwe; name = Cwe_190.name; version = Cwe_190.version; requires_pairs = false; has_parameters = false};
{cwe_func = Cwe_215.check_cwe; name = Cwe_215.name; version = Cwe_215.version; requires_pairs = false; has_parameters = false};
{cwe_func = Cwe_243.check_cwe; name = Cwe_243.name; version = Cwe_243.version; requires_pairs = true; has_parameters = false};
......@@ -36,6 +37,7 @@ let cmdline_flags = [
("check-path", "Checks if there is a path from an input function to a CWE hit.");
let cmdline_params = [
("config", "Path to configuration file.");
("out", "Path to output file.");
......@@ -43,16 +45,20 @@ let cmdline_params = [
("api", "C header file for additional subroutine information.")
let build_version_sexp () =
List.map known_modules ~f:(fun cwe -> Format.sprintf "\"%s\": \"%s\"" cwe.name cwe.version)
|> String.concat ~sep:", "
let print_module_versions () =
Log_utils.info (sprintf "[cwe_checker] module_versions: {%s}" (build_version_sexp ()))
let print_version () =
print_endline version
let print_help_message ((): unit) : unit =
let flags = cmdline_flags in
let params = cmdline_params in
......@@ -62,7 +68,8 @@ let print_help_message ((): unit) : unit =
List.iter ~f:(fun x -> Printf.printf " -%s: %s\n" (fst x) (snd x)) params
let execute_cwe_module cwe json program project tid_address_map =
let execute_cwe_module (cwe : cwe_module) (json : Yojson.Basic.t) (project : Project.t) (program : program term) (tid_address_map : word Tid.Map.t) : unit =
let parameters = match cwe.has_parameters with
| false -> []
| true -> Json_utils.get_parameter_list_from_json json cwe.name in
......@@ -73,6 +80,7 @@ let execute_cwe_module cwe json program project tid_address_map =
let symbols = Json_utils.get_symbols_from_json json cwe.name in
cwe.cwe_func program project tid_address_map [symbols] parameters
let check_valid_module_list (modules : string list) : unit =
let known_module_names = List.map ~f:(fun x -> x.name) known_modules in
match List.find modules ~f:(fun module_name -> not (Stdlib.List.mem module_name known_module_names) ) with
......@@ -80,28 +88,22 @@ let check_valid_module_list (modules : string list) : unit =
failwith ("[cwe_checker] Unknown CWE module " ^ module_name ^ ". Known modules: " ^ String.concat (List.map ~f:(fun x -> x ^ " ") known_module_names));
| None -> ()
let partial_run project config modules =
let program = Project.program project in
let tid_address_map = Address_translation.generate_tid_map program in
let json = Yojson.Basic.from_file config in
let partial_run (json : Yojson.Basic.t) (project : Project.t) (program : program term) (tid_address_map : word Tid.Map.t) (modules : string list) : unit =
let () = check_valid_module_list modules in
Log_utils.info (sprintf "[cwe_checker] Just running the following analyses: %s." (String.concat (List.map ~f:(fun x -> x ^ " ") modules)));
List.iter modules ~f:(fun cwe ->
let cwe_mod = match List.find known_modules ~f:(fun x -> x.name = cwe) with
| Some(module_) -> module_
| None -> failwith "[cwe_checker] Unknown CWE module" in
let program = Project.program project in
execute_cwe_module cwe_mod json program project tid_address_map
execute_cwe_module cwe_mod json project program tid_address_map
let full_run project config =
let program = Project.program project in
let tid_address_map = Address_translation.generate_tid_map program in
let json = Yojson.Basic.from_file config in
let full_run_modules = List.filter known_modules ~f:(fun cwe_module ->
cwe_module.name <> "Memory" (* TODO: Remove this when the memory check is more stable *)
) in
List.iter full_run_modules ~f:(fun cwe -> execute_cwe_module cwe json program project tid_address_map)
let full_run (json : Yojson.Basic.t) (project : Project.t) (program : program term) (tid_address_map : word Tid.Map.t) : unit =
List.iter known_modules ~f:(fun cwe ->
if cwe.name <> "Memory" then (* TODO: Remove this when the memory check is more stable *)
execute_cwe_module cwe json project program tid_address_map)
let build_output_path (path : string) : string =
......@@ -119,7 +121,7 @@ let build_output_path (path : string) : string =
| _ -> path (* file does not exist. We generate a new file with this name. *)
let main flags params project =
let main (flags : Bool.t String.Map.t) (params : String.t String.Map.t) (project : Project.t) =
let config = String.Map.find_exn params "config" in
let module_versions = String.Map.find_exn flags "module-versions" in
let partial_update = String.Map.find_exn params "partial" in
......@@ -152,15 +154,18 @@ let main flags params project =
Log_utils.error "[cwe_checker] Configuration file not found. Aborting..."
let prog = Project.program project in
let tid_address_map = Address_translation.generate_tid_map prog in
let json = Yojson.Basic.from_file config in
let () = match Symbol_utils.check_if_symbols_resolved project prog tid_address_map with
| false -> Log_utils.error "BAP is not able to resolve external symbols."
| true -> () in
if partial_update = "" then
full_run project config
full_run json project prog tid_address_map
partial_run project config (String.split partial_update ~on: ',');
partial_run json project prog tid_address_map (String.split partial_update ~on: ',');
if check_path then
let prog = Project.program project in
let tid_address_map = Address_translation.generate_tid_map prog in
let json = Yojson.Basic.from_file config in
let check_path_sources = Json_utils.get_symbols_from_json json "check_path" in
let check_path_sinks = Log_utils.get_cwe_warnings () in
Check_path.check_path prog tid_address_map check_path_sources check_path_sinks
......@@ -30,6 +30,17 @@ val check_valid_module_list: string list -> unit
(** prints the help message *)
val print_help_message: unit -> unit
(** Executes one CWE module *)
val execute_cwe_module: cwe_module -> Yojson.Basic.t -> Bap.Std.project -> Bap.Std.program Bap.Std.term -> Bap.Std.word Bap.Std.Tid.Map.t -> unit
(** Only runs checks on CWE module specified by user. *)
val partial_run: Yojson.Basic.t -> Bap.Std.project -> Bap.Std.program Bap.Std.term -> Bap.Std.word Bap.Std.Tid.Map.t -> string list -> unit
(** Runs checks on all supported CWE modules. *)
val full_run: Yojson.Basic.t -> Bap.Std.project -> Bap.Std.program Bap.Std.term -> Bap.Std.word Bap.Std.Tid.Map.t -> unit
val build_output_path: string -> string
(** The main function drives the execution of the cwe_checker plugin in BAP.
The command line arguments are passed as maps from their name to to their values
......@@ -2,8 +2,6 @@
open Bap.Std
open Core_kernel
let dyn_syms = ref None
let callee_saved_registers = ref None
let bin_format = ref ""
......@@ -32,22 +30,6 @@ let get_supported_architectures (() : unit) : string list =
| _ -> !supported_architectures
let call_objdump (proj : Project.t) ~flag:(flag : string) ~err:(err : string) : string list =
match Project.get proj filename with
| None -> failwith "[cwe_checker] Project has no file name."
| Some(fname) -> begin
let cmd = Format.sprintf ("objdump %s %s") flag fname in
let in_chan = Unix.open_process_in cmd in
let lines = In_channel.input_lines in_chan in
let () = In_channel.close in_chan in
Unix.Unix_error (e,fm,argm) ->
failwith (Format.sprintf "%s %s %s %s" err (Unix.error_message e) fm argm)
let infer_bin_format_from_symbols (project : Project.t) : string =
match Option.is_some (Symtab.find_by_name (Project.symbols project) "__GetPEImageBase") with
| true -> "pe"
......@@ -57,7 +39,7 @@ let infer_bin_format_from_symbols (project : Project.t) : string =
let extract_bin_format (project : Project.t) : string =
match !bin_format with
| "" -> begin
let header = call_objdump project ~flag:"-f" ~err:"[cwe_checker] Parsing of file header failed:" in
let header = Support_functions.call_objdump project ~flag:"-f" ~err:"[cwe_checker] Parsing of file header failed:" in
let arch = Project.arch project in
match header with
| _::line::_ -> begin
......@@ -117,39 +99,3 @@ let is_parameter_register (var: Var.t) (project: Project.t) : Bool.t =
let is_return_register (var: Var.t) (project: Project.t) : Bool.t =
let ret_register = get_register_list project "return" in
Option.is_some (List.find ret_register ~f:(String.equal (Var.name var)))
(** Parse a line from the dyn-syms output table of objdump. Return the name of a symbol if the symbol is an extern function name. *)
let parse_dyn_sym_line (line : string) : string option =
let line = ref (String.strip line) in
let str_list = ref [] in
while Option.is_some (String.rsplit2 !line ~on:' ') do
let (left, right) = Option.value_exn (String.rsplit2 !line ~on:' ') in
line := String.strip left;
str_list := right :: !str_list;
str_list := !line :: !str_list;
match !str_list with
| _value :: func1 :: func2 :: _ -> begin
if (String.equal func1 "DF" || String.equal func2 "DF") then (
List.last !str_list
else None
| _ -> None
let parse_dyn_syms (project : Project.t) : String.Set.t =
match !dyn_syms with
| Some(symbol_set) -> symbol_set
| None -> begin
let lines = call_objdump project ~flag:"--dynamic-syms" ~err:"[cwe_checker] Parsing of dynamic symbols failed:" in
match lines with
| _ :: _ :: _ :: _ :: tail -> (* The first four lines are not part of the table *)
let symbol_set = String.Set.of_list (List.filter_map tail ~f:parse_dyn_sym_line) in
dyn_syms := Some(symbol_set);
| _ ->
dyn_syms := Some(String.Set.empty);
......@@ -22,20 +22,10 @@ val is_parameter_register: Var.t -> Project.t -> Bool.t
val is_return_register: Var.t -> Project.t -> Bool.t
(** Returns a list of those function names that are extern symbols.
TODO: Since we do not do name demangling here, check whether bap name demangling
yields different function names for the symbols. *)
val parse_dyn_syms: Project.t -> String.Set.t
(** Returns a string list of supported architectures from the registers.json. *)
val get_supported_architectures : unit -> string list
(** Calls objdump with customisable flag and error message. Returns output lines as string list. *)
val call_objdump : Project.t -> flag:string -> err:string -> string list
(** Infers the binary format using the file's symbol table. *)
val infer_bin_format_from_symbols : Project.t -> string
open Core_kernel
open Bap.Std
let call_objdump (proj : Project.t) ~flag:(flag : string) ~err:(err : string) : string list =
match Project.get proj filename with
| None -> failwith "[cwe_checker] Project has no file name."
| Some(fname) -> begin
let cmd = Format.sprintf ("objdump %s %s") flag fname in
let in_chan = Unix.open_process_in cmd in
let lines = In_channel.input_lines in_chan in
let () = In_channel.close in_chan in
Unix.Unix_error (e,fm,argm) ->
failwith (Format.sprintf "%s %s %s %s" err (Unix.error_message e) fm argm)
(** Calls objdump with customisable flag and error message. Returns output lines as string list. *)
val call_objdump : Bap.Std.Project.t -> flag:string -> err:string -> string list
......@@ -18,8 +18,61 @@ type extern_symbol =
let extern_symbol_blacklist = [
let extern_symbols = ref []
let dyn_syms = ref None
let found_calls = ref []
let call_finder_run = ref false
(** Parse a line from the dyn-syms output table of objdump. Return the name of a symbol if the symbol is an extern function name. *)
let parse_dyn_sym_line (line : string) : string option =
let line = ref (String.strip line) in
let str_list = ref [] in
while Option.is_some (String.rsplit2 !line ~on:' ') do
let (left, right) = Option.value_exn (String.rsplit2 !line ~on:' ') in
line := String.strip left;
str_list := right :: !str_list;
str_list := !line :: !str_list;
match !str_list with
| value :: func1 :: func2 :: _ -> begin
match ( String.strip ~drop:(fun x -> x = '0') value ) with
| "" -> begin
if (String.equal func1 "DF" || String.equal func2 "DF") then (
List.last !str_list
else None
| _ -> None (* The symbol has a nonzero value, so we assume that it is not an extern function symbol. *)
| _ -> None
let parse_dyn_syms (project : Project.t) : String.Set.t =
match !dyn_syms with
| Some(symbol_set) -> symbol_set
| None -> begin
let lines = Support_functions.call_objdump project ~flag:"--dynamic-syms" ~err:"[cwe_checker] Parsing of dynamic symbols failed:" in
match lines with
| _ :: _ :: _ :: _ :: tail -> (* The first four lines are not part of the table *)
let symbol_set = String.Set.of_list (List.filter_map tail ~f:parse_dyn_sym_line) in
dyn_syms := Some(symbol_set);
| _ ->
dyn_syms := Some(String.Set.empty);
let get_project_calling_convention (project : Project.t) : string option =
Project.get project Bap_abi.name
......@@ -40,7 +93,7 @@ let build_extern_symbols (project : Project.t) (program : program term) (parsed_
let build_and_return_extern_symbols (project : Project.t) (program : program term) (tid_map : word Tid.Map.t) : extern_symbol list =
let parsed_symbols = Cconv.parse_dyn_syms project in
let parsed_symbols = parse_dyn_syms project in
if String.Set.is_empty parsed_symbols then []
else begin
match !extern_symbols with
......@@ -169,6 +222,29 @@ let call_finder : (tid * tid) list Term.visitor = object
let get_calls (program : program term) : (tid * tid) list =
match !call_finder_run with
| true -> !found_calls
| false -> begin
call_finder_run := true;
found_calls := call_finder#run program [];
let check_if_symbols_resolved (project : Project.t) (program : program term) (tid_map : word Tid.Map.t) : bool =
let extern = build_and_return_extern_symbols project program tid_map in
let extern = List.filter extern ~f:(fun ext_sym -> not (Stdlib.List.mem ext_sym.name extern_symbol_blacklist)) in
match List.is_empty extern with
| true -> false
| false -> begin
let calls = List.map (get_calls program) ~f:(fun call -> match call with (_, dst) -> dst) in
let not_resolved = List.filter extern ~f:(fun ext_sym -> not (Stdlib.List.mem ext_sym.tid calls)) in
List.length extern <> List.length not_resolved
let transform_call_to_concrete_call ((src_tid, dst_tid) : tid * tid) (symbols : symbol list) : concrete_call =
match (get_symbol dst_tid symbols) with
| Some symbol -> {call_site = src_tid; symbol_address = dst_tid; name = symbol.name}
......@@ -23,6 +23,13 @@ type extern_symbol = {
; args : (Bap.Std.Var.t * Bap.Std.Exp.t * Bap.Std.intent option) list;
(** Returns a list of those function names that are extern symbols.
TODO: Since we do not do name demangling here, check whether bap name demangling
yields different function names for the symbols. *)
val parse_dyn_syms: Bap.Std.Project.t -> String.Set.t
(** Parses each line returned from dynamic symbol call. *)
val parse_dyn_sym_line : string -> string option
(** Returns the calling convention for the whole project inferred by Bap. *)
val get_project_calling_convention : Bap.Std.Project.t -> string option
......@@ -62,6 +69,13 @@ val calls_callsite_symbol : Bap.Std.Jmp.t -> symbol -> bool
(** This function finds all (direct) calls in a program. It returns a list of tuples of (callsite, address).*)
val call_finder : (Bap.Std.tid * Bap.Std.tid) list Bap.Std.Term.visitor
(** Checks whether the call_finder has already extracted the calls from the program, and if so, returns a global variable.
Otherwise the call_finder is called *)
val get_calls : Bap.Std.program Bap.Std.term -> (Bap.Std.tid * Bap.Std.tid) list
(** Checks whether extern symbols have been resolved by Bap. If not a single symbol has been resolved, an error message is returned. *)
val check_if_symbols_resolved : Bap.Std.Project.t -> Bap.Std.program Bap.Std.term -> Bap.Std.word Bap.Std.Tid.Map.t -> bool
(** Transform a call (e.g. found with call_finder) to concrete_call with the symbol resolved.*)
val transform_call_to_concrete_call :
Bap.Std.tid * Bap.Std.tid -> symbol list -> concrete_call
......@@ -2,6 +2,8 @@ all:
bapbundle remove unit_tests_cwe_checker.plugin
bapbuild -r -Is analysis,checkers,utils unit_tests_cwe_checker.plugin -pkgs alcotest,yojson,unix,ppx_jane,cwe_checker_core
bapbundle install unit_tests_cwe_checker.plugin
bap tmp/no_symbols_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=Symbols
bap tmp/arrays_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=DynSyms
bap tmp/arrays_x86_64-w64-mingw32_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=Cconv
bap tmp/arrays_i686-w64-mingw32_gcc.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=Cconv
bap tmp/arrays_x86_clang.out --pass=unit-tests-cwe-checker --unit-tests-cwe-checker-tests=Cconv
......@@ -175,7 +175,7 @@ let test_malloc_call_return_reg () =
let state = update_block_analysis block fn_start_state ~sub_tid ~project in
(* test whether the return register is marked as a pointer register. This fails if the example project is not a x64 binary. *)
let state_reg_list = Map.to_alist state.TypeInfo.reg in
let () = String.Set.iter (Cconv.parse_dyn_syms project) ~f:(fun elem -> print_endline elem) in
let () = String.Set.iter (Symbol_utils.parse_dyn_syms project) ~f:(fun elem -> print_endline elem) in
let () = check "malloc_return_register_marked" (match List.find state_reg_list ~f:(fun (var, _register_info) -> Var.name var = "RAX") with
| Some((_var, register_info)) -> (* TODO: test whether the target is set correctly. *)
begin match register_info with
......@@ -2,3 +2,4 @@
./compile_testfile.sh testfiles/arrays.c
./compile_testfile.sh testfiles/c_constructs.c
./compile_testfile.sh testfiles/no_symbols.c
int main () {
return 0;
......@@ -34,6 +34,8 @@ let unit_test_list = [
"CWE476", Cwe_476_test.tests;
"CWE560", Cwe_560_test.tests;
"AddrTrans", Address_translation_test.tests;
"Symbols", Symbol_utils_test.tests;
"DynSyms", Parse_dyn_syms_test.tests;
"SerdeJson", Serde_json_test.tests;
......@@ -65,6 +67,8 @@ let set_example_project (project : Project.t) (tests : string list) =
Cconv_test.example_bin_format := Some(get_test_bin_format project)
| "CWE476" -> Cwe_476_test.example_project := Some(project)
| "Symbols" -> Symbol_utils_test.example_project := Some(project)
| "DynSyms" -> Parse_dyn_syms_test.example_project := Some(project)
| "SerdeJson" -> Serde_json_test.example_project := Some(project)
| _ -> ()
open Core_kernel
open Cwe_checker_core
open Symbol_utils
let check msg x = Alcotest.(check bool) msg true x
let example_project = ref None
let test_parse_dyn_syms () =
(* this test assumes, that the example project is the arrays_x64.out binary from the artificial samples. *)
let project = Option.value_exn !example_project in
let () = check "free_as_dyn_sym" (String.Set.mem (parse_dyn_syms project) "free") in
let () = check "__libc_start_main_as_dyn_sym" (String.Set.mem (parse_dyn_syms project) "__libc_start_main") in
let () = check "malloc_as_dyn_sym" (String.Set.mem (parse_dyn_syms project) "malloc") in
let () = check "realloc_not_a_dyn_sym" (false = String.Set.mem (parse_dyn_syms project) "realloc") in
let tests = [
"Parse Dynamic Symbols", `Quick, test_parse_dyn_syms;
open Bap.Std
val example_project: Project.t option ref
val tests: unit Alcotest.test_case list
open Core_kernel
open Cwe_checker_core
open Bap.Std
open Symbol_utils
let check msg x = Alcotest.(check bool) msg true x
let example_project = ref None
let test_check_if_symbols_resolved () =
let project = Option.value_exn !example_project in
let program = Project.program project in
let tid_address_map = Address_translation.generate_tid_map program in
let () = check "no_symbols" (check_if_symbols_resolved project program tid_address_map = false) in
let tests = [
"Check if Symbols Resolved", `Quick, test_check_if_symbols_resolved;
open Bap.Std
val example_project: Project.t option ref
val tests: unit Alcotest.test_case list
