Unverified Commit 4e30a70d by Melvin Klimke Committed by GitHub

Symbol checking (#58)

Added a new symbol structure enabling more precise handling of extern symbols.
parent 5c607b11
......@@ -6,6 +6,7 @@ exception NoOutputFileException of string
exception NoModulesException of string
exception NoConfigException of string
exception NoBinaryPathException of string
exception NoApiFileException of string
let rec get_difference (set_a : 'a list) (set_b: 'a list) : 'a list =
......@@ -48,39 +49,36 @@ let rec remove_element (flags : string list) (element: string): string list =
| false -> head::remove_element tail element
let check_config (input : string list) : unit =
match find_prefix input "-config" with
| None -> Cwe_checker_core.Log_utils.info "Using standard configuration..."
| Some c ->
match Stdlib.List.nth_opt (String.split c ~on:'=') 1 with
| None | Some ""-> raise (NoConfigException "No config file provided. If -config flag set please provide a config file.")
| Some f -> if (Sys.file_exists f) then () else raise (InvalidPathException "Path to config file not valid")
let raise_no_content_exception (param : string) : unit =
match param with
| "-config" -> raise (NoConfigException "No config file provided. If -config flag set please provide a config file.")
| "-out" -> raise (NoOutputFileException "No output file provided. If -out flag is set please provide an out file.")
| "-partial" -> raise (NoModulesException "No modules provided. If -partial flag is set, please provide the corresponding modules.")
| "-api" -> raise (NoApiFileException "No header file provided. If -api flag is set, please provide a valid header file.")
| _ -> failwith "Invalid param."
let check_output_path (input : string list) : unit =
match find_prefix input "-out" with
| Some param -> begin
try
match Stdlib.List.nth (String.split param ~on:'=') 1 with
| "" -> raise (NoOutputFileException "No output file provided. If -out flag is set please provide an out file.")
| _ -> ()
with
| _ -> raise (NoOutputFileException "No output file provided. If -out flag is set please provide an out file.")
let check_content (input : string) (param : string) : unit =
match Stdlib.List.nth_opt (String.split input ~on:'=') 1 with
| None | Some "" -> raise_no_content_exception param
| Some content -> begin
match param with
| "-partial" -> check_valid_module_list (String.split_on_chars content ~on:[','])
| "-config" | "-api" -> if (Sys.file_exists content) then () else raise (InvalidPathException "Path to config file not valid")
| _ -> ()
end
| None -> ()
let setup_flags (flags : string list) : string =
String.concat ~sep:" " (List.map ~f:(fun pre -> "--cwe-checker" ^ pre) flags)
let check_partial (input : string list) : unit =
match find_prefix input "-partial" with
| None -> ()
| Some p ->
match Stdlib.List.nth_opt (String.split p ~on:'=') 1 with
| None | Some "" -> raise (NoModulesException "No modules provided. If -partial flag is set please provide the corresponding modules.")
| Some modules -> check_valid_module_list (String.split_on_chars modules ~on:[','])
let check_params (params : string list) (input : string list) : unit =
List.iter params ~f:(fun param ->
match find_prefix input param with
| None -> begin
match (String.equal param "-config") with
| true -> Cwe_checker_core.Log_utils.info "Using standard configuration..."
| false -> ()
end
| Some p -> check_content p param
)
let validate_user_input (input : string list) : unit =
......@@ -133,7 +131,7 @@ let rec check_for_binary_path (args : string list) : string =
)
let process_input () : string * string list =
let process_input (() : unit) : string * string list =
match get_user_input () with
| [] -> raise (NoBinaryPathException ("No binary path was provided. If you need help, please call the cwe_checker with the --help or -h flag"))
| input -> (
......@@ -142,22 +140,34 @@ let process_input () : string * string list =
if check_for_module_versions input then exit 0;
check_for_no_logging input;
let binary_path = check_for_binary_path input in
let split_flags = List.partition_tf input ~f:(fun x -> (String.is_prefix x ~prefix:"-config") || (String.is_prefix x ~prefix:"-out") || (String.is_prefix x ~prefix:"-partial")) in
let split_flags = List.partition_tf input ~f:(fun x -> (String.is_prefix x ~prefix:"-config") || (String.is_prefix x ~prefix:"-out")
|| (String.is_prefix x ~prefix:"-partial") || (String.is_prefix x ~prefix:"-api")) in
let flags = remove_element (snd split_flags) binary_path in
let params = fst split_flags in
check_partial params; check_config params; check_output_path params;
(binary_path, params @ process_flags flags)
let input_params = fst split_flags in
let params = List.map cmdline_params ~f:(fun param -> match param with | (p, _) -> "-" ^ p) in
check_params params input_params;
(binary_path, input_params @ process_flags flags)
)
let setup_command (bin_path : string) (args : string list) : string =
let bare_command = "bap " ^ bin_path ^ " --pass=cwe-checker " in
let command_args = String.concat ~sep:" " (List.map args ~f:(fun arg ->
match (String.is_prefix arg ~prefix:"-api") with
| true -> "--api-path=" ^ (Stdlib.List.nth (String.split arg ~on:'=') 1)
| false -> "--cwe-checker" ^ arg)) in
bare_command ^ command_args
let main () : int =
match Array.length Sys.argv with
| 1 -> print_help_message (); 0
| _ ->
let args = process_input () in
match snd args with
| [] -> Sys.command ("bap " ^ fst args ^ " --pass=cwe-checker ")
| _ -> Sys.command ("bap " ^ fst args ^ " --pass=cwe-checker " ^ setup_flags (snd args))
let (bin_path, args) = process_input () in
match args with
| [] -> Sys.command ("bap " ^ bin_path ^ " --pass=cwe-checker ")
| _ -> Sys.command (setup_command bin_path args)
let _ = exit (main ())
......@@ -31,7 +31,7 @@ let get_call_dests_of_sub sub =
end
| _ -> []
let rec check dests symbols =
let rec check dests (symbols : symbol list) =
match dests with
| [] -> (List.length symbols) = 0
| hd :: tl ->
......
......@@ -4,6 +4,8 @@
(libraries
yojson
bap
bap-api
bap-abi
core_kernel
ppx_deriving_yojson.runtime)
(preprocess (pps ppx_jane ppx_deriving_yojson))
......
......@@ -39,6 +39,7 @@ let cmdline_params = [
("config", "Path to configuration file.");
("out", "Path to output file.");
("partial", "Comma separated list of modules to apply on binary, e.g. 'CWE332,CWE476,CWE782'");
("api", "C header file for additional subroutine information.")
]
let build_version_sexp () =
......
open Core_kernel
open Bap.Std
let translate_tid_to_assembler_address_string tid tid_map =
let translate_tid_to_assembler_address_string (tid : tid) (tid_map : word Tid.Map.t) : string =
match Tid.Map.find tid_map tid with
| Some asm_addr -> Word.to_string asm_addr
| _ -> "UNKNOWN"
let generate_tid_map prog =
let generate_tid_map (prog : program term) : word Tid.Map.t =
(object
inherit [addr Tid.Map.t] Term.visitor
method! enter_term _ t addrs = match Term.get_attr t address with
| None -> addrs
| Some addr -> Map.add_exn addrs ~key:(Term.tid t) ~data:addr
end)#run prog Tid.Map.empty
end)#run prog Tid.Map.empty
let tid_to_string tid = Bap.Std.Tid.name tid
let tid_to_string tid = Bap.Std.Tid.name tid
(** This module helps to translate between IR addresses and addresses found in the actual assembler code.
(** This module helps to translate between IR addresses and addresses found in the actual assembler code.
At first, a mapping between the two addressing schemes has to be computed with the function generate_tid_map.
Call this function once at start up.Then, we can translate IR addresses (Bap.Std.tid) to addresses
Call this function once at start up.Then, we can translate IR addresses (Bap.Std.tid) to addresses
in assembler code (represented as string). *)
(** Generates a map that maps from TIDs to real addresses of the assembly code. *)
......@@ -12,5 +12,6 @@ val generate_tid_map :
val translate_tid_to_assembler_address_string :
Bap.Std.tid -> Bap.Std.word Bap.Std.Tid.Map.t -> string
val tid_to_string :
Bap.Std.tid -> string
......@@ -93,7 +93,7 @@ let is_return_register (var: Var.t) (project: Project.t) : Bool.t =
let ret_register = get_return_register_list project in
Option.is_some (List.find ret_register ~f:(String.equal (Var.name var)))
(** Parse a line from the dyn-syms output table of readelf. Return the name of a symbol if the symbol is an extern function name. *)
(** Parse a line from the dyn-syms output table of objdump. Return the name of a symbol if the symbol is an extern function name. *)
let parse_dyn_sym_line (line : string) : string option =
let line = ref (String.strip line) in
let str_list = ref [] in
......@@ -116,7 +116,7 @@ let parse_dyn_sym_line (line : string) : string option =
end
| _ -> None
let parse_dyn_syms project =
let parse_dyn_syms (project : Project.t) : String.Set.t =
match !dyn_syms with
| Some(symbol_set) -> symbol_set
| None ->
......
......@@ -8,18 +8,74 @@ type symbol =
}
let find_symbol program name =
type extern_symbol =
{
tid : tid;
address : string;
name : string;
cconv : string option;
args : (Var.t * Exp.t * intent option) list;
}
let extern_symbols = ref []
let get_project_calling_convention (project : Project.t) : string option =
Project.get project Bap_abi.name
let build_extern_symbols (project : Project.t) (program : program term) (parsed_symbols : string list) (tid_map : word Tid.Map.t) : unit =
let calling_convention = get_project_calling_convention project in
extern_symbols := List.append !extern_symbols (Seq.to_list (Seq.filter_map (Term.enum sub_t program) ~f:(fun s ->
let sub_name = Sub.name s in
let sub_tid = Term.tid s in
match (Stdlib.List.mem sub_name parsed_symbols) with
| true -> begin
let addr = Address_translation.translate_tid_to_assembler_address_string sub_tid tid_map in
let args = Seq.to_list (Seq.map (Term.enum arg_t s) ~f:(fun a -> (Arg.lhs a, Arg.rhs a, Arg.intent a))) in
Some({tid=sub_tid; address=addr; name=sub_name; cconv=calling_convention; args=args;})
end
| false -> None)))
let build_and_return_extern_symbols (project : Project.t) (program : program term) (tid_map : word Tid.Map.t) : extern_symbol list =
let parsed_symbols = Cconv.parse_dyn_syms project in
if String.Set.is_empty parsed_symbols then []
else begin
match !extern_symbols with
| [] -> build_extern_symbols project program (String.Set.to_list parsed_symbols) tid_map; !extern_symbols
| _ -> !extern_symbols
end
let add_as_extern_symbol (project : Project.t) (program : program term) (symbol : string) (tid_map : word Tid.Map.t) : unit =
Seq.iter (Term.enum sub_t program) ~f:(fun s ->
match String.equal (Sub.name s) symbol with
| true -> begin
let sub_tid = Term.tid s in
let args = Seq.to_list (Seq.map (Term.enum arg_t s) ~f:(fun a -> (Arg.lhs a, Arg.rhs a, Arg.intent a))) in
let addr = Address_translation.translate_tid_to_assembler_address_string sub_tid tid_map in
extern_symbols := List.append !extern_symbols [{tid=sub_tid; address=addr; name=(Sub.name s); cconv=(get_project_calling_convention project); args=args}]
end
| false -> ()
)
let find_symbol (program : program term) (name : string) : tid option =
Term.enum sub_t program |>
Seq.find_map ~f:(fun s -> Option.some_if (Sub.name s = name) (Term.tid s))
Seq.find_map ~f:(fun s -> Option.some_if (Sub.name s = name) (Term.tid s))
let build_symbols symbol_names prog =
let build_symbols (symbol_names : string list) (prog : program term) : symbol list =
List.map symbol_names ~f:(fun symbol -> let symbol_address = find_symbol prog symbol in
{address = symbol_address; name = symbol;})
|> List.filter ~f:(fun symbol -> match symbol.address with
| Some _ -> true
| _ -> false)
let get_symbol_of_string prog name =
let get_symbol_of_string (prog : program term) (name : string) : symbol option =
let symbol_address = find_symbol prog name in
match symbol_address with
| Some _ -> Some ({
......@@ -28,13 +84,15 @@ let get_symbol_of_string prog name =
})
| None -> None
let get_symbol tid symbols =
let get_symbol (tid : tid) (symbols : symbol list) : symbol option =
List.find symbols ~f:(
fun symbol -> match symbol.address with
| Some address -> tid = address
| None -> false)
let get_symbol_name_from_jmp jmp symbols =
let get_symbol_name_from_jmp (jmp : Jmp.t) (symbols : symbol list) : string =
match Jmp.kind jmp with
| Goto _ | Ret _ | Int (_,_) -> assert(false)
| Call destination -> begin
......@@ -50,7 +108,8 @@ let get_symbol_name_from_jmp jmp symbols =
| _ -> assert(false)
end
let get_direct_callsites_of_sub sub =
let get_direct_callsites_of_sub (sub : sub term) : jmp term Sequence.t =
Term.enum blk_t sub |>
Seq.concat_map ~f:(fun blk ->
Term.enum jmp_t blk |> Seq.filter_map ~f:(fun j ->
......@@ -61,7 +120,8 @@ Term.enum blk_t sub |>
| _ -> None
end))
let sub_calls_symbol prog sub symbol_name =
let sub_calls_symbol (prog : program term) (sub : sub term) (symbol_name : string) : bool =
let symbol_struct = find_symbol prog symbol_name in
match symbol_struct with
| Some s -> begin
......@@ -74,7 +134,8 @@ let sub_calls_symbol prog sub symbol_name =
end
| _ -> false
let calls_callsite_symbol jmp symbol =
let calls_callsite_symbol (jmp : Jmp.t) (symbol : symbol) : bool =
match Jmp.kind jmp with
| Goto _ | Ret _ | Int (_,_) -> false
| Call dst -> begin
......@@ -95,7 +156,8 @@ type concrete_call =
name : string;
}
let call_finder = object
let call_finder : (tid * tid) list Term.visitor = object
inherit [(tid * tid) list] Term.visitor
method! enter_jmp jmp tid_list = match Jmp.kind jmp with
| Goto _ | Ret _ | Int (_,_) -> tid_list
......@@ -107,12 +169,13 @@ let call_finder = object
end
let transform_call_to_concrete_call (src_tid, dst_tid) symbols =
let transform_call_to_concrete_call ((src_tid, dst_tid) : tid * tid) (symbols : symbol list) : concrete_call =
match (get_symbol dst_tid symbols) with
| Some symbol -> {call_site = src_tid; symbol_address = dst_tid; name = symbol.name}
| None -> assert(false)
let filter_calls_to_symbols calls symbols =
let filter_calls_to_symbols (calls : (tid * tid) list) (symbols : symbol list) : concrete_call list =
List.filter calls ~f:(
fun (_, dst) -> List.exists symbols ~f:(
fun symbol -> match symbol.address with
......@@ -120,7 +183,8 @@ let filter_calls_to_symbols calls symbols =
| None -> false))
|> List.map ~f:(fun call -> transform_call_to_concrete_call call symbols)
let is_interesting_callsite jmp relevant_calls =
let is_interesting_callsite (jmp : Jmp.t) (relevant_calls : concrete_call list): bool =
match Jmp.kind jmp with
| Goto _ | Ret _ | Int (_,_) -> false
| Call dst -> match Call.target dst with
......@@ -128,7 +192,7 @@ let is_interesting_callsite jmp relevant_calls =
| _ -> false
let check_calls relevant_calls prog proj tid_map symbols check_func =
let check_calls (relevant_calls : concrete_call list) (prog : program term) (proj : 'a) (tid_map : 'b) (symbols : 'c) (check_func) : unit =
Seq.iter (Term.enum sub_t prog)
~f:(fun sub ->
begin
......@@ -138,7 +202,8 @@ let check_calls relevant_calls prog proj tid_map symbols check_func =
check_func proj prog sub blk jmp tid_map symbols))
end)
let get_symbol_call_count_of_sub symbol_name sub prog =
let get_symbol_call_count_of_sub (symbol_name : string) (sub : Sub.t) (prog : Program.t) : int =
match find_symbol prog symbol_name with
| Some s -> begin
Seq.to_list (get_direct_callsites_of_sub sub)
......@@ -152,7 +217,8 @@ let get_symbol_call_count_of_sub symbol_name sub prog =
end
| _ -> 0
let extract_direct_call_tid_from_block block =
let extract_direct_call_tid_from_block (block : blk term) : tid option =
let jmp_instructions = Term.enum jmp_t block in
Seq.fold jmp_instructions ~init:None ~f:(fun already_found instr ->
match already_found with
......@@ -165,7 +231,8 @@ let extract_direct_call_tid_from_block block =
Some(tid)
| _ -> None)
let get_program_entry_points (program: Program.t) : Sub.t List.t =
let get_program_entry_points (program : program term) : sub term List.t =
let subfunctions = Term.enum sub_t program in
let entry_points = Seq.filter subfunctions ~f:(fun subfn -> Term.has_attr subfn Sub.entry_point) in
match Seq.find subfunctions ~f:(fun subfn -> "main" = Sub.name subfn) with
......@@ -176,16 +243,19 @@ let get_program_entry_points (program: Program.t) : Sub.t List.t =
main_fn :: (Seq.to_list entry_points)
| None -> Seq.to_list entry_points
let stack_register project =
let stack_register (project : Project.t) : Var.t =
let arch = Project.arch project in
let module Target = (val target_of_arch arch) in
Target.CPU.sp
let flag_register_list project =
let flag_register_list (project : Project.t) : Var.t list =
let arch = Project.arch project in
let module Target = (val target_of_arch arch) in
Target.CPU.zf :: Target.CPU.cf :: Target.CPU.vf :: Target.CPU.nf :: []
let arch_pointer_size_in_bytes project : int =
let arch_pointer_size_in_bytes (project : Project.t) : int =
let arch = Project.arch project in
Size.in_bytes (Arch.addr_size arch)
......@@ -14,6 +14,28 @@ type symbol = {
; name : string;
}
(** This type represents an external symbol. *)
type extern_symbol = {
tid : Bap.Std.tid
; address : string
; name : string
; cconv : string option
; args : (Bap.Std.Var.t * Bap.Std.Exp.t * Bap.Std.intent option) list;
}
(** Returns the calling convention for the whole project inferred by Bap. *)
val get_project_calling_convention : Bap.Std.Project.t -> string option
(** Checks whether the external symbols have already been built. If not, it calls the symbol builder. *)
val build_and_return_extern_symbols : Bap.Std.Project.t -> Bap.Std.program Bap.Std.term -> Bap.Std.word Bap.Std.Tid.Map.t -> extern_symbol list
(** Builds a list of function symbols type from external function names given by objdump. *)
val build_extern_symbols : Bap.Std.Project.t -> Bap.Std.program Bap.Std.term -> string list -> Bap.Std.word Bap.Std.Tid.Map.t -> unit
(** Adds an analysed internal symbol to the list of external symbols. *)
val add_as_extern_symbol : Bap.Std.Project.t -> Bap.Std.program Bap.Std.term -> string -> Bap.Std.word Bap.Std.Tid.Map.t -> unit
(** Finds a symbol string in a program and returns its IR address (tid). *)
val find_symbol : Bap.Std.program Bap.Std.term -> string -> Bap.Std.tid option
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment