Unverified Commit 4e30a70d by Melvin Klimke Committed by GitHub

Symbol checking (#58)

Added a new symbol structure enabling more precise handling of extern symbols.
parent 5c607b11
......@@ -6,6 +6,7 @@ exception NoOutputFileException of string
exception NoModulesException of string
exception NoConfigException of string
exception NoBinaryPathException of string
exception NoApiFileException of string
let rec get_difference (set_a : 'a list) (set_b: 'a list) : 'a list =
......@@ -48,39 +49,36 @@ let rec remove_element (flags : string list) (element: string): string list =
| false -> head::remove_element tail element
let check_config (input : string list) : unit =
match find_prefix input "-config" with
| None -> Cwe_checker_core.Log_utils.info "Using standard configuration..."
| Some c ->
match Stdlib.List.nth_opt (String.split c ~on:'=') 1 with
| None | Some ""-> raise (NoConfigException "No config file provided. If -config flag set please provide a config file.")
| Some f -> if (Sys.file_exists f) then () else raise (InvalidPathException "Path to config file not valid")
let raise_no_content_exception (param : string) : unit =
match param with
| "-config" -> raise (NoConfigException "No config file provided. If -config flag set please provide a config file.")
| "-out" -> raise (NoOutputFileException "No output file provided. If -out flag is set please provide an out file.")
| "-partial" -> raise (NoModulesException "No modules provided. If -partial flag is set, please provide the corresponding modules.")
| "-api" -> raise (NoApiFileException "No header file provided. If -api flag is set, please provide a valid header file.")
| _ -> failwith "Invalid param."
let check_output_path (input : string list) : unit =
match find_prefix input "-out" with
| Some param -> begin
try
match Stdlib.List.nth (String.split param ~on:'=') 1 with
| "" -> raise (NoOutputFileException "No output file provided. If -out flag is set please provide an out file.")
let check_content (input : string) (param : string) : unit =
match Stdlib.List.nth_opt (String.split input ~on:'=') 1 with
| None | Some "" -> raise_no_content_exception param
| Some content -> begin
match param with
| "-partial" -> check_valid_module_list (String.split_on_chars content ~on:[','])
| "-config" | "-api" -> if (Sys.file_exists content) then () else raise (InvalidPathException "Path to config file not valid")
| _ -> ()
with
| _ -> raise (NoOutputFileException "No output file provided. If -out flag is set please provide an out file.")
end
| None -> ()
let setup_flags (flags : string list) : string =
String.concat ~sep:" " (List.map ~f:(fun pre -> "--cwe-checker" ^ pre) flags)
let check_partial (input : string list) : unit =
match find_prefix input "-partial" with
| None -> ()
| Some p ->
match Stdlib.List.nth_opt (String.split p ~on:'=') 1 with
| None | Some "" -> raise (NoModulesException "No modules provided. If -partial flag is set please provide the corresponding modules.")
| Some modules -> check_valid_module_list (String.split_on_chars modules ~on:[','])
let check_params (params : string list) (input : string list) : unit =
List.iter params ~f:(fun param ->
match find_prefix input param with
| None -> begin
match (String.equal param "-config") with
| true -> Cwe_checker_core.Log_utils.info "Using standard configuration..."
| false -> ()
end
| Some p -> check_content p param
)
let validate_user_input (input : string list) : unit =
......@@ -133,7 +131,7 @@ let rec check_for_binary_path (args : string list) : string =
)
let process_input () : string * string list =
let process_input (() : unit) : string * string list =
match get_user_input () with
| [] -> raise (NoBinaryPathException ("No binary path was provided. If you need help, please call the cwe_checker with the --help or -h flag"))
| input -> (
......@@ -142,22 +140,34 @@ let process_input () : string * string list =
if check_for_module_versions input then exit 0;
check_for_no_logging input;
let binary_path = check_for_binary_path input in
let split_flags = List.partition_tf input ~f:(fun x -> (String.is_prefix x ~prefix:"-config") || (String.is_prefix x ~prefix:"-out") || (String.is_prefix x ~prefix:"-partial")) in
let split_flags = List.partition_tf input ~f:(fun x -> (String.is_prefix x ~prefix:"-config") || (String.is_prefix x ~prefix:"-out")
|| (String.is_prefix x ~prefix:"-partial") || (String.is_prefix x ~prefix:"-api")) in
let flags = remove_element (snd split_flags) binary_path in
let params = fst split_flags in
check_partial params; check_config params; check_output_path params;
(binary_path, params @ process_flags flags)
let input_params = fst split_flags in
let params = List.map cmdline_params ~f:(fun param -> match param with | (p, _) -> "-" ^ p) in
check_params params input_params;
(binary_path, input_params @ process_flags flags)
)
let setup_command (bin_path : string) (args : string list) : string =
let bare_command = "bap " ^ bin_path ^ " --pass=cwe-checker " in
let command_args = String.concat ~sep:" " (List.map args ~f:(fun arg ->
match (String.is_prefix arg ~prefix:"-api") with
| true -> "--api-path=" ^ (Stdlib.List.nth (String.split arg ~on:'=') 1)
| false -> "--cwe-checker" ^ arg)) in
bare_command ^ command_args
let main () : int =
match Array.length Sys.argv with
| 1 -> print_help_message (); 0
| _ ->
let args = process_input () in
match snd args with
| [] -> Sys.command ("bap " ^ fst args ^ " --pass=cwe-checker ")
| _ -> Sys.command ("bap " ^ fst args ^ " --pass=cwe-checker " ^ setup_flags (snd args))
let (bin_path, args) = process_input () in
match args with
| [] -> Sys.command ("bap " ^ bin_path ^ " --pass=cwe-checker ")
| _ -> Sys.command (setup_command bin_path args)
let _ = exit (main ())
......@@ -31,7 +31,7 @@ let get_call_dests_of_sub sub =
end
| _ -> []
let rec check dests symbols =
let rec check dests (symbols : symbol list) =
match dests with
| [] -> (List.length symbols) = 0
| hd :: tl ->
......
......@@ -4,6 +4,8 @@
(libraries
yojson
bap
bap-api
bap-abi
core_kernel
ppx_deriving_yojson.runtime)
(preprocess (pps ppx_jane ppx_deriving_yojson))
......
......@@ -39,6 +39,7 @@ let cmdline_params = [
("config", "Path to configuration file.");
("out", "Path to output file.");
("partial", "Comma separated list of modules to apply on binary, e.g. 'CWE332,CWE476,CWE782'");
("api", "C header file for additional subroutine information.")
]
let build_version_sexp () =
......
open Core_kernel
open Bap.Std
let translate_tid_to_assembler_address_string tid tid_map =
let translate_tid_to_assembler_address_string (tid : tid) (tid_map : word Tid.Map.t) : string =
match Tid.Map.find tid_map tid with
| Some asm_addr -> Word.to_string asm_addr
| _ -> "UNKNOWN"
let generate_tid_map prog =
let generate_tid_map (prog : program term) : word Tid.Map.t =
(object
inherit [addr Tid.Map.t] Term.visitor
method! enter_term _ t addrs = match Term.get_attr t address with
......@@ -14,4 +16,5 @@ let generate_tid_map prog =
| Some addr -> Map.add_exn addrs ~key:(Term.tid t) ~data:addr
end)#run prog Tid.Map.empty
let tid_to_string tid = Bap.Std.Tid.name tid
let tid_to_string tid = Bap.Std.Tid.name tid
......@@ -12,5 +12,6 @@ val generate_tid_map :
val translate_tid_to_assembler_address_string :
Bap.Std.tid -> Bap.Std.word Bap.Std.Tid.Map.t -> string
val tid_to_string :
Bap.Std.tid -> string
......@@ -93,7 +93,7 @@ let is_return_register (var: Var.t) (project: Project.t) : Bool.t =
let ret_register = get_return_register_list project in
Option.is_some (List.find ret_register ~f:(String.equal (Var.name var)))
(** Parse a line from the dyn-syms output table of readelf. Return the name of a symbol if the symbol is an extern function name. *)
(** Parse a line from the dyn-syms output table of objdump. Return the name of a symbol if the symbol is an extern function name. *)
let parse_dyn_sym_line (line : string) : string option =
let line = ref (String.strip line) in
let str_list = ref [] in
......@@ -116,7 +116,7 @@ let parse_dyn_sym_line (line : string) : string option =
end
| _ -> None
let parse_dyn_syms project =
let parse_dyn_syms (project : Project.t) : String.Set.t =
match !dyn_syms with
| Some(symbol_set) -> symbol_set
| None ->
......
......@@ -8,18 +8,74 @@ type symbol =
}
let find_symbol program name =
type extern_symbol =
{
tid : tid;
address : string;
name : string;
cconv : string option;
args : (Var.t * Exp.t * intent option) list;
}
let extern_symbols = ref []
let get_project_calling_convention (project : Project.t) : string option =
Project.get project Bap_abi.name
let build_extern_symbols (project : Project.t) (program : program term) (parsed_symbols : string list) (tid_map : word Tid.Map.t) : unit =
let calling_convention = get_project_calling_convention project in
extern_symbols := List.append !extern_symbols (Seq.to_list (Seq.filter_map (Term.enum sub_t program) ~f:(fun s ->
let sub_name = Sub.name s in
let sub_tid = Term.tid s in
match (Stdlib.List.mem sub_name parsed_symbols) with
| true -> begin
let addr = Address_translation.translate_tid_to_assembler_address_string sub_tid tid_map in
let args = Seq.to_list (Seq.map (Term.enum arg_t s) ~f:(fun a -> (Arg.lhs a, Arg.rhs a, Arg.intent a))) in
Some({tid=sub_tid; address=addr; name=sub_name; cconv=calling_convention; args=args;})
end
| false -> None)))
let build_and_return_extern_symbols (project : Project.t) (program : program term) (tid_map : word Tid.Map.t) : extern_symbol list =
let parsed_symbols = Cconv.parse_dyn_syms project in
if String.Set.is_empty parsed_symbols then []
else begin
match !extern_symbols with
| [] -> build_extern_symbols project program (String.Set.to_list parsed_symbols) tid_map; !extern_symbols
| _ -> !extern_symbols
end
let add_as_extern_symbol (project : Project.t) (program : program term) (symbol : string) (tid_map : word Tid.Map.t) : unit =
Seq.iter (Term.enum sub_t program) ~f:(fun s ->
match String.equal (Sub.name s) symbol with
| true -> begin
let sub_tid = Term.tid s in
let args = Seq.to_list (Seq.map (Term.enum arg_t s) ~f:(fun a -> (Arg.lhs a, Arg.rhs a, Arg.intent a))) in
let addr = Address_translation.translate_tid_to_assembler_address_string sub_tid tid_map in
extern_symbols := List.append !extern_symbols [{tid=sub_tid; address=addr; name=(Sub.name s); cconv=(get_project_calling_convention project); args=args}]
end
| false -> ()
)
let find_symbol (program : program term) (name : string) : tid option =
Term.enum sub_t program |>
Seq.find_map ~f:(fun s -> Option.some_if (Sub.name s = name) (Term.tid s))
let build_symbols symbol_names prog =
let build_symbols (symbol_names : string list) (prog : program term) : symbol list =
List.map symbol_names ~f:(fun symbol -> let symbol_address = find_symbol prog symbol in
{address = symbol_address; name = symbol;})
|> List.filter ~f:(fun symbol -> match symbol.address with
| Some _ -> true
| _ -> false)
let get_symbol_of_string prog name =
let get_symbol_of_string (prog : program term) (name : string) : symbol option =
let symbol_address = find_symbol prog name in
match symbol_address with
| Some _ -> Some ({
......@@ -28,13 +84,15 @@ let get_symbol_of_string prog name =
})
| None -> None
let get_symbol tid symbols =
let get_symbol (tid : tid) (symbols : symbol list) : symbol option =
List.find symbols ~f:(
fun symbol -> match symbol.address with
| Some address -> tid = address
| None -> false)
let get_symbol_name_from_jmp jmp symbols =
let get_symbol_name_from_jmp (jmp : Jmp.t) (symbols : symbol list) : string =
match Jmp.kind jmp with
| Goto _ | Ret _ | Int (_,_) -> assert(false)
| Call destination -> begin
......@@ -50,7 +108,8 @@ let get_symbol_name_from_jmp jmp symbols =
| _ -> assert(false)
end
let get_direct_callsites_of_sub sub =
let get_direct_callsites_of_sub (sub : sub term) : jmp term Sequence.t =
Term.enum blk_t sub |>
Seq.concat_map ~f:(fun blk ->
Term.enum jmp_t blk |> Seq.filter_map ~f:(fun j ->
......@@ -61,7 +120,8 @@ Term.enum blk_t sub |>
| _ -> None
end))
let sub_calls_symbol prog sub symbol_name =
let sub_calls_symbol (prog : program term) (sub : sub term) (symbol_name : string) : bool =
let symbol_struct = find_symbol prog symbol_name in
match symbol_struct with
| Some s -> begin
......@@ -74,7 +134,8 @@ let sub_calls_symbol prog sub symbol_name =
end
| _ -> false
let calls_callsite_symbol jmp symbol =
let calls_callsite_symbol (jmp : Jmp.t) (symbol : symbol) : bool =
match Jmp.kind jmp with
| Goto _ | Ret _ | Int (_,_) -> false
| Call dst -> begin
......@@ -95,7 +156,8 @@ type concrete_call =
name : string;
}
let call_finder = object
let call_finder : (tid * tid) list Term.visitor = object
inherit [(tid * tid) list] Term.visitor
method! enter_jmp jmp tid_list = match Jmp.kind jmp with
| Goto _ | Ret _ | Int (_,_) -> tid_list
......@@ -107,12 +169,13 @@ let call_finder = object
end
let transform_call_to_concrete_call (src_tid, dst_tid) symbols =
let transform_call_to_concrete_call ((src_tid, dst_tid) : tid * tid) (symbols : symbol list) : concrete_call =
match (get_symbol dst_tid symbols) with
| Some symbol -> {call_site = src_tid; symbol_address = dst_tid; name = symbol.name}
| None -> assert(false)
let filter_calls_to_symbols calls symbols =
let filter_calls_to_symbols (calls : (tid * tid) list) (symbols : symbol list) : concrete_call list =
List.filter calls ~f:(
fun (_, dst) -> List.exists symbols ~f:(
fun symbol -> match symbol.address with
......@@ -120,7 +183,8 @@ let filter_calls_to_symbols calls symbols =
| None -> false))
|> List.map ~f:(fun call -> transform_call_to_concrete_call call symbols)
let is_interesting_callsite jmp relevant_calls =
let is_interesting_callsite (jmp : Jmp.t) (relevant_calls : concrete_call list): bool =
match Jmp.kind jmp with
| Goto _ | Ret _ | Int (_,_) -> false
| Call dst -> match Call.target dst with
......@@ -128,7 +192,7 @@ let is_interesting_callsite jmp relevant_calls =
| _ -> false
let check_calls relevant_calls prog proj tid_map symbols check_func =
let check_calls (relevant_calls : concrete_call list) (prog : program term) (proj : 'a) (tid_map : 'b) (symbols : 'c) (check_func) : unit =
Seq.iter (Term.enum sub_t prog)
~f:(fun sub ->
begin
......@@ -138,7 +202,8 @@ let check_calls relevant_calls prog proj tid_map symbols check_func =
check_func proj prog sub blk jmp tid_map symbols))
end)
let get_symbol_call_count_of_sub symbol_name sub prog =
let get_symbol_call_count_of_sub (symbol_name : string) (sub : Sub.t) (prog : Program.t) : int =
match find_symbol prog symbol_name with
| Some s -> begin
Seq.to_list (get_direct_callsites_of_sub sub)
......@@ -152,7 +217,8 @@ let get_symbol_call_count_of_sub symbol_name sub prog =
end
| _ -> 0
let extract_direct_call_tid_from_block block =
let extract_direct_call_tid_from_block (block : blk term) : tid option =
let jmp_instructions = Term.enum jmp_t block in
Seq.fold jmp_instructions ~init:None ~f:(fun already_found instr ->
match already_found with
......@@ -165,7 +231,8 @@ let extract_direct_call_tid_from_block block =
Some(tid)
| _ -> None)
let get_program_entry_points (program: Program.t) : Sub.t List.t =
let get_program_entry_points (program : program term) : sub term List.t =
let subfunctions = Term.enum sub_t program in
let entry_points = Seq.filter subfunctions ~f:(fun subfn -> Term.has_attr subfn Sub.entry_point) in
match Seq.find subfunctions ~f:(fun subfn -> "main" = Sub.name subfn) with
......@@ -176,16 +243,19 @@ let get_program_entry_points (program: Program.t) : Sub.t List.t =
main_fn :: (Seq.to_list entry_points)
| None -> Seq.to_list entry_points
let stack_register project =
let stack_register (project : Project.t) : Var.t =
let arch = Project.arch project in
let module Target = (val target_of_arch arch) in
Target.CPU.sp
let flag_register_list project =
let flag_register_list (project : Project.t) : Var.t list =
let arch = Project.arch project in
let module Target = (val target_of_arch arch) in
Target.CPU.zf :: Target.CPU.cf :: Target.CPU.vf :: Target.CPU.nf :: []
let arch_pointer_size_in_bytes project : int =
let arch_pointer_size_in_bytes (project : Project.t) : int =
let arch = Project.arch project in
Size.in_bytes (Arch.addr_size arch)
......@@ -14,6 +14,28 @@ type symbol = {
; name : string;
}
(** This type represents an external symbol. *)
type extern_symbol = {
tid : Bap.Std.tid
; address : string
; name : string
; cconv : string option
; args : (Bap.Std.Var.t * Bap.Std.Exp.t * Bap.Std.intent option) list;
}
(** Returns the calling convention for the whole project inferred by Bap. *)
val get_project_calling_convention : Bap.Std.Project.t -> string option
(** Checks whether the external symbols have already been built. If not, it calls the symbol builder. *)
val build_and_return_extern_symbols : Bap.Std.Project.t -> Bap.Std.program Bap.Std.term -> Bap.Std.word Bap.Std.Tid.Map.t -> extern_symbol list
(** Builds a list of function symbols type from external function names given by objdump. *)
val build_extern_symbols : Bap.Std.Project.t -> Bap.Std.program Bap.Std.term -> string list -> Bap.Std.word Bap.Std.Tid.Map.t -> unit
(** Adds an analysed internal symbol to the list of external symbols. *)
val add_as_extern_symbol : Bap.Std.Project.t -> Bap.Std.program Bap.Std.term -> string -> Bap.Std.word Bap.Std.Tid.Map.t -> unit
(** Finds a symbol string in a program and returns its IR address (tid). *)
val find_symbol : Bap.Std.program Bap.Std.term -> string -> Bap.Std.tid option
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment