Commit c631bcfe by Enkelmann Committed by Thomas Barabosch

Type inference (#18)

Initial version of type inference. It is still very rudimentary at the moment since it just tracks pointer but it's a very solid start! 
parent 2d0fdfc6
......@@ -10,7 +10,7 @@
- Improved check for CWE-476 (NULL Pointer Dereference) using data flow analysis (PR #11)
- Added cwe_checker_emulation plugin based on BAP's Primus to detect CWE-125, CWE-415, and CWE-416 (PR #15)
- Switched C build system from make to scons (PR #16)
- Added type inference pass (PR #14)
- Added type inference pass (PR #14, #18)
- Added unit tests to test suite (PR #14)
0.1 (2018-10-08)
......
......@@ -4,111 +4,234 @@ open Core_kernel
(** TODO:
interprocedural analysis
backward analysis to recognize which constants are pointers and which not.
extend to track FunctionPointer, DataPointer
extend to track PointerTargets
maybe extend to track FunctionPointer,
TODO: There are no checks yet if a value from the stack of the calling function
is accessed. Maybe this should be part of another analysis.
TODO: tracking for PointerTargets should also track if another register other
than the stack register is used to access values on the stack of the current
function.
TODO: the fixpoint analysis does not track whether a pointer could have an
unknown target as long as it has at least one known target. This should be
tracked in an extra analysis step after the fixpoint analysis finished.
*)
let name = "Type Inference"
let version = "0.1"
let version = "0.2"
(* TODO: result_option and result_map should be abstracted away into its own data type and into its own file. *)
(* generic merge of two ('a, unit) Result.t Option.t *)
let merge_result_option val1 val2 =
match (val1, val2) with
| (Some(Ok(x)), Some(Ok(y))) when x = y -> Some(Ok(x))
| (Some(x), None)
| (None, Some(x)) -> Some(x)
| (None, None) -> None
| _ -> Some(Error(()))
(* generic binop of two ('a, unit) Result.t Option.t *)
let binop_result_option val1 val2 ~op =
match (val1, val2) with
| (Some(Ok(x)), Some(Ok(y))) -> Some(Ok(op x y))
| (Some(Ok(_)), None)
| (None, Some(Ok(_))) -> None
| (None, None) -> None
| _ -> Some(Error(()))
(* generic merge of two ('a, unit) Result.t Map.t*)
let merge_result_map val1 val2 ~value_merge =
Map.merge val1 val2 ~f:(fun ~key values ->
match values with
| `Left(x)
| `Right(x) -> Some(x)
| `Both(Ok(x1), Ok(x2)) -> Some(value_merge x1 x2)
| `Both(_, _) -> Some(Error(()))
)
(* generic equal of two ('a, unit) Result.t Option.t)*)
let equal_result_option val1 val2 ~value_equal =
match (val1, val2) with
| (Some(Ok(x)), Some(Ok(y))) -> value_equal x y
| (Some(Error(())), Some(Error(()))) -> true
| (None, None) -> true
| _ -> false
module PointerTargetInfo = struct
type t = {
offset: (Bitvector.t, unit) Result.t Option.t;
alignment: (int, unit) Result.t Option.t;
} [@@deriving bin_io, compare, sexp]
let merge info1 info2 =
{ offset = merge_result_option info1.offset info2.offset;
alignment = merge_result_option info1.alignment info2.alignment; }
let equal info1 info2 =
equal_result_option info1.offset info2.offset ~value_equal:Bitvector.(=) && equal_result_option info1.alignment info2.alignment ~value_equal: Int.(=)
end (* module *)
module Register = struct
type t =
| Pointer
| Pointer of PointerTargetInfo.t Tid.Map.t
| Data
[@@deriving bin_io, compare, sexp]
let merge reg1 reg2 =
if reg1 = reg2 then Some(Ok(reg1)) else Some(Error(()))
match (reg1, reg2) with
| (Pointer(target_info1), Pointer(target_info2)) ->
Ok(Pointer(Map.merge target_info1 target_info2 ~f:(fun ~key values ->
match values with
| `Left(info)
| `Right(info) -> Some(info)
| `Both(info1, info2) -> Some(PointerTargetInfo.merge info1 info2)
)))
| (Data, Data) -> Ok(Data)
| _ -> Error(())
(* Checks whether two registers hold the same data *)
let equal reg1 reg2 =
reg1 = reg2
end
match (reg1, reg2) with
| (Pointer(targets1), Pointer(targets2)) -> Map.equal PointerTargetInfo.equal targets1 targets2
| (Data, Data) -> true
| _ -> false
(** add to the offsets of all possible targets of the register. *)
let add_to_offsets register value_res_opt =
match register with
| Pointer(targets) ->
let new_targets = Map.map targets ~f:(fun target ->
{ target with offset = binop_result_option target.offset value_res_opt ~op:Bitvector.(+) }
) in
Pointer(new_targets)
| Data -> Data
(** subtract from the offsets of all possible targets of the register. *)
let sub_from_offsets register value_res_opt =
match register with
| Pointer(targets) ->
let new_targets = Map.map targets ~f:(fun target ->
{ target with offset = binop_result_option target.offset value_res_opt ~op:Bitvector.sub }
) in
Pointer(new_targets)
| Data -> Data
(** sets all target offsets and alignments to unknown. Right now used as long as alignment tracking is not implemented. *)
let set_unknown_offsets register =
match register with
| Pointer(targets) ->
let new_targets = Map.map targets ~f:(fun target -> { PointerTargetInfo.offset = None; alignment = None }) in
Pointer(new_targets)
| Data -> Data
end (* module *)
module TypeInfo = struct
type reg_state = (Register.t, unit) Result.t Var.Map.t [@@deriving bin_io, compare, sexp]
type t = {
stack: Register.t Mem_region.t;
stack_offset: (Bitvector.t, unit) Result.t Option.t; (* If we don't know the offset, this is None, if we have conflicting values for the offset, this is Some(Error()) *)
reg: reg_state;
} [@@deriving bin_io, compare, sexp]
let merge state1 state2 =
let stack = Mem_region.merge state1.stack state2.stack ~data_merge:Register.merge in
let stack_offset = match (state1.stack_offset, state2.stack_offset) with
| (Some(Ok(x)), Some(Ok(y))) when x = y -> Some(Ok(x))
| (Some(x), None)
| (None, Some(x)) -> Some(x)
| (None, None) -> None
| _ -> Some(Error(())) in
let reg = Map.merge state1.reg state2.reg ~f:(fun ~key values ->
match values with
| `Left(reg)
| `Right(reg) -> Some(reg)
| `Both(Ok(reg1), Ok(reg2)) -> Register.merge reg1 reg2
| `Both(_, _) -> Some(Error(()))
) in
let stack = Mem_region.merge state1.stack state2.stack ~data_merge:(fun x y -> Some(Register.merge x y )) in
let reg = merge_result_map state1.reg state2.reg ~value_merge:Register.merge in
{ stack = stack;
stack_offset = stack_offset;
reg = reg }
reg = reg;
}
let equal state1 state2 =
if state1.stack_offset = state2.stack_offset && (Mem_region.equal state1.stack state2.stack ~data_equal:Register.equal) then
Map.equal (fun reg1 reg2 -> reg1 = reg2) state1.reg state2.reg
else
false
let equal state1 state2 =
if Mem_region.equal state1.stack state2.stack ~data_equal:Register.equal then
Map.equal (fun reg1 reg2 -> match (reg1, reg2) with
| (Ok(register1), Ok(register2)) -> Register.equal register1 register2
| (Error(()), Error(())) -> true
| _ -> false
) state1.reg state2.reg
else
false
(** Get an empty state. *)
let empty () =
let module VarMap = Var.Map in
{ stack = Mem_region.empty ();
stack_offset = None;
reg = VarMap.empty;
reg = Var.Map.empty;
}
(** Returns a register list with only the stack pointer as pointer register and
only the flag registers as data registers. *)
let get_stack_pointer_and_flags project =
let stack_pointer = Symbol_utils.stack_register project in
let reg = Map.set Var.Map.empty ~key:stack_pointer ~data:(Ok(Register.Pointer)) in
(** add flag register as known data register *)
let add_flags state project =
let flags = Symbol_utils.flag_register_list project in
List.fold flags ~init:reg ~f:(fun state register ->
Map.set state register (Ok(Register.Data)) )
List.fold flags ~init:state ~f:(fun state register ->
{ state with reg = (Map.set state.reg ~key:register ~data:(Ok(Register.Data))) } )
(** set stack register as known stack pointer. Deletes other targets of the stack pointer. *)
let set_stack_register state ?offset ?alignment ~sub_tid ~project =
let stack_register = Symbol_utils.stack_register project in
let offset = match offset with
| Some(x) -> Some(Ok(x))
| None -> None in
let alignment = match alignment with
| Some(x) -> Some(Ok(x))
| None -> None in
let stack_info = { PointerTargetInfo.offset = offset; alignment = alignment;} in
let stack_target_map = Map.set Tid.Map.empty ~key:sub_tid ~data:stack_info in
{ state with reg = Map.set state.reg stack_register (Ok(Register.Pointer(stack_target_map))); }
(** Returns a TypeInfo.t with only the stack pointer as pointer register (with
unknown offset) and only the flag registers as data registers. The stack is empty. *)
let only_stack_pointer_and_flags sub_tid project =
let state = empty () in
let state = add_flags state project in
let state = set_stack_register state ?offset:None ?alignment:None ~sub_tid ~project in
state
(** create a new state with stack pointer as known pointer register and all flag
registers as known data registers. The stack itself is empty and the offset
is 0. (TODO for interprocedural analysis: Ensure that the return address is
marked as a pointer!) *)
let function_start_state project =
let module VarMap = Var.Map in
let reg = get_stack_pointer_and_flags project in
{ stack = Mem_region.empty ();
stack_offset = Some(Ok(Bitvector.of_int 0 ~width:(Symbol_utils.arch_pointer_size_in_bytes project * 8)));
reg = reg;
}
let function_start_state sub_tid project =
let state = empty () in
let state = add_flags state project in
let zero_offset = Bitvector.of_int 0 ~width:(Symbol_utils.arch_pointer_size_in_bytes project * 8) in
let state = set_stack_register state ~offset:zero_offset ?alignment:None ~sub_tid ~project in
state
let remove_virtual_registers state =
{ state with reg = Map.filter_keys state.reg ~f:(fun var -> Var.is_physical var) }
let stack_offset_add state (value:Bitvector.t) =
match state.stack_offset with
| Some(Ok(x)) -> { state with stack_offset = Some(Ok(Bitvector.(+) x value)) }
(** udate offsets of all possible targets of the register by adding the given value *)
(* TODO: also implement correct offset for AND and OR if the alignment is known *)
let register_offset_add state register (value:Bitvector.t) =
match Map.find state.reg register with
| Some(Ok(Pointer(targets))) ->
let updated_targets = Map.map targets ~f:(fun type_info ->
match type_info.offset with
| Some(Ok(x)) -> { type_info with offset = Some(Ok(Bitvector.(+) x value ))}
| _ -> type_info
) in
{ state with reg = Map.set state.reg ~key:register ~data:(Ok(Pointer(updated_targets))) }
| _ -> state
(** if the addr_exp is a (computable) stack offset, return the offset *)
let compute_stack_offset state addr_exp ~project =
(** if the addr_exp is a (computable) stack offset, return the offset. In cases where addr_expr
may or may not be a stack offset (i.e. offset of a register which may point to the stack or
to some other memory region), it still returns an offset. *)
let compute_stack_offset state addr_exp ~sub_tid ~project : Bitvector.t Option.t =
let (register, offset) = match addr_exp with
| Bil.Var(var) -> (Some(var), Bitvector.of_int 0 ~width:(Symbol_utils.arch_pointer_size_in_bytes project * 8))
| Bil.BinOp(Bil.PLUS, Bil.Var(var), Bil.Int(num)) -> (Some(var), num)
| Bil.BinOp(Bil.MINUS, Bil.Var(var), Bil.Int(num)) -> (Some(var), Bitvector.neg (Bitvector.signed num))
| _ -> (None, Bitvector.of_int 0 ~width:(Symbol_utils.arch_pointer_size_in_bytes project * 8)) in
match (register, state.stack_offset) with
| (Some(var), Some(Ok(base_offset))) when var = (Symbol_utils.stack_register project) -> Some(Bitvector.(+) base_offset offset)
match register with
| Some(var) ->
begin match Map.find state.reg var with
| Some(Ok(Pointer(targets))) ->
begin match Map.find targets sub_tid with
| Some(target_info) ->
begin match target_info.offset with
| Some(Ok(target_offset)) -> Some(Bitvector.(+) target_offset offset)
| _ -> None
end
| None -> None
end
| _ -> None
end
| _ -> None
(* Pretty printer that just prints the sexp. Needed for the creation of type_info_tag. *)
......@@ -141,12 +264,14 @@ let rec nested_exp_list exp : Exp.t list =
exp :: nested_exp
(** If exp is a load from the stack, return the corresponding element.
(** If exp is a load from the stack, return the corresponding element. If it may be
a load from the stack, but could also be a load from some other memory region,
we still assume that the type information on the stack is correct and return it.
TODO: Bil.AND and Bil.OR are ignored, because we do not track alignment yet. *)
let get_stack_elem state exp ~project =
let get_stack_elem state exp ~sub_tid ~project =
match exp with
| Bil.Load(_, addr, endian, size) -> begin (* TODO: add a test for correct endianess *)
match TypeInfo.compute_stack_offset state addr project with
match TypeInfo.compute_stack_offset state addr sub_tid project with
| Some(offset) -> begin
match Mem_region.get state.TypeInfo.stack offset with
| Some(Ok(elem, elem_size)) ->
......@@ -161,36 +286,41 @@ let get_stack_elem state exp ~project =
end
| _ -> None
(* compute the value of an expression. This is a stub and will be replaced when we
have a proper pass for value inference. *)
let value_of_exp exp =
match exp with
| Bil.Int(x) -> Some(Ok(x))
| _ -> None
let rec type_of_exp exp (state: TypeInfo.t) ~project =
let rec type_of_exp exp (state: TypeInfo.t) ~sub_tid ~project =
let open Register in
match exp with
| Bil.Load(_) -> (* TODO: Right now only the stack is tracked for type infos. *)
get_stack_elem state exp ~project
get_stack_elem state exp ~sub_tid ~project
| Bil.Store(_) -> None (* Stores are handled in another function. *)
| Bil.BinOp(binop, exp1, exp2) -> begin
match (binop, type_of_exp exp1 state project, type_of_exp exp2 state project) with
match (binop, type_of_exp exp1 state sub_tid project, type_of_exp exp2 state sub_tid project) with
(* pointer arithmetics *)
| (Bil.PLUS, Some(Ok(Pointer)), Some(Ok(Pointer))) -> Some(Error(()))
| (Bil.PLUS, Some(Ok(Pointer)), other)
| (Bil.PLUS, other, Some(Ok(Pointer))) -> Some(Ok(Pointer))
| (Bil.PLUS, Some(Ok(Pointer(_))), Some(Ok(Pointer(_)))) -> Some(Error(()))
| (Bil.PLUS, Some(Ok(Pointer(targets))), summand) -> Some(Ok(Register.add_to_offsets (Pointer(targets)) (value_of_exp exp2)))
| (Bil.PLUS, summand, Some(Ok(Pointer(targets)))) -> Some(Ok(Register.add_to_offsets (Pointer(targets)) (value_of_exp exp1)))
| (Bil.PLUS, Some(Ok(Data)), Some(Ok(Data))) -> Some(Ok(Data))
| (Bil.PLUS, _, _) -> None
| (Bil.MINUS, Some(Ok(Pointer)), Some(Ok(Pointer))) -> Some(Ok(Data)) (* Pointer subtraction to determine offset is CWE-469, this should be logged. *)
| (Bil.MINUS, Some(Ok(Pointer)), other) -> Some(Ok(Pointer)) (* We assume that other is not a pointer. This can only generate errors in the presence of CWE-469 *)
| (Bil.MINUS, Some(Ok(Pointer(_))), Some(Ok(Pointer(_)))) -> Some(Ok(Data)) (* Pointer subtraction to determine offset is CWE-469, this should be logged. *)
| (Bil.MINUS, Some(Ok(Pointer(targets))), other) -> Some(Ok(Register.sub_from_offsets (Pointer(targets)) (value_of_exp exp2))) (* We assume that other is not a pointer. This can only generate errors in the presence of CWE-469 *)
| (Bil.MINUS, Some(Ok(Data)), Some(Ok(Data))) -> Some(Ok(Data))
| (Bil.MINUS, _, _) -> None
(* bitwise AND and OR can be used as addition and subtraction if some alignment of the pointer is known *)
| (Bil.AND, Some(Ok(Pointer)), Some(Ok(Pointer))) -> Some(Error(())) (* TODO: This could be a pointer, but is there any case where this is used in practice? *)
| (Bil.AND, Some(Ok(Pointer)), other)
| (Bil.AND, other, Some(Ok(Pointer))) -> Some(Ok(Pointer))
| (Bil.AND, Some(Ok(Pointer(_))), Some(Ok(Pointer(_)))) -> Some(Error(())) (* TODO: This could be a pointer, but is there any case where this is used in practice? *)
| (Bil.AND, Some(Ok(Pointer(targets))), other)
| (Bil.AND, other, Some(Ok(Pointer(targets)))) -> Some(Ok(Register.set_unknown_offsets (Pointer(targets))))
| (Bil.AND, Some(Ok(Data)), Some(Ok(Data))) -> Some(Ok(Data))
| (Bil.AND, _, _) -> None
| (Bil.OR, Some(Ok(Pointer)), Some(Ok(Pointer))) -> Some(Error(())) (* TODO: This could be a pointer, but is there any case where this is used in practice? *)
| (Bil.OR, Some(Ok(Pointer)), other)
| (Bil.OR, other, Some(Ok(Pointer))) -> Some(Ok(Pointer))
| (Bil.OR, Some(Ok(Pointer(_))), Some(Ok(Pointer(_)))) -> Some(Error(())) (* TODO: This could be a pointer, but is there any case where this is used in practice? *)
| (Bil.OR, Some(Ok(Pointer(targets))), other)
| (Bil.OR, other, Some(Ok(Pointer(targets)))) -> Some(Ok(Register.set_unknown_offsets (Pointer(targets))))
| (Bil.OR, Some(Ok(Data)), Some(Ok(Data))) -> Some(Ok(Data))
| (Bil.OR, _, _) -> None
| _ -> Some(Ok(Data)) (* every other operation should not yield valid pointers *)
......@@ -200,11 +330,11 @@ let rec type_of_exp exp (state: TypeInfo.t) ~project =
| Bil.Int(_) -> None (* TODO: For non-relocateable binaries this could be a pointer to a function/global variable *)
| Bil.Cast(Bil.SIGNED, _, _) -> Some(Ok(Data))
| Bil.Cast(_, size, exp) ->
if size = (Symbol_utils.arch_pointer_size_in_bytes project * 8) then type_of_exp exp state project else Some(Ok(Data)) (* TODO: There is probably a special case when 64bit addresses are converted to 32bit addresses here, which can yield pointers *)
if size = (Symbol_utils.arch_pointer_size_in_bytes project * 8) then type_of_exp exp state sub_tid project else Some(Ok(Data)) (* TODO: There is probably a special case when 64bit addresses are converted to 32bit addresses here, which can yield pointers *)
| Bil.Let(_) -> None
| Bil.Unknown(_) -> None
| Bil.Ite(if_, then_, else_) -> begin
match (type_of_exp then_ state project, type_of_exp else_ state project) with
match (type_of_exp then_ state sub_tid project, type_of_exp else_ state sub_tid project) with
| (Some(value1), Some(value2)) -> if value1 = value2 then Some(value1) else None
| _ -> None
end
......@@ -215,30 +345,51 @@ let pointer_size_as_bitvector project =
let psize = Symbol_utils.arch_pointer_size_in_bytes project in
Bitvector.of_int psize ~width:(psize * 8)
(* If exp is a store to the stack, add the corresponding value to the stack. If the
we cannot determine the value, delete the corresponding data on the stack. *)
let set_stack_elem state exp ~project =
(* If exp is a store to the stack, add the corresponding value to the stack if possible. If the
we cannot determine the value, delete the corresponding data on the stack.
Custom behaviour if we cannot determine the exact position of the store or if it
is unclear, whether it really was a store onto the stack or to somewhere else. *)
let set_stack_elem state exp ~sub_tid ~project =
match exp with
| Bil.Store(_, addr_exp, value_exp, endian, size) ->
begin
match (TypeInfo.compute_stack_offset state addr_exp project, type_of_exp value_exp state ~project) with
| (Some(offset), Some(Ok(value))) when Size.in_bytes size = (Symbol_utils.arch_pointer_size_in_bytes project) ->
let stack = Mem_region.add state.TypeInfo.stack value ~pos:offset ~size:(pointer_size_as_bitvector project) in
{ state with TypeInfo.stack = stack}
| (Some(offset), Some(Ok(value))) when Size.in_bytes size <> (Symbol_utils.arch_pointer_size_in_bytes project) ->
let stack = Mem_region.add state.TypeInfo.stack Register.Data ~pos:offset ~size:(Bitvector.of_int (Size.in_bytes size) ~width:(Symbol_utils.arch_pointer_size_in_bytes project)) in
{ state with TypeInfo.stack = stack}
| (Some(offset), Some(Error(_))) ->
let stack = Mem_region.mark_error state.TypeInfo.stack ~pos:offset ~size:(Bitvector.of_int (Size.in_bytes size) ~width:(Symbol_utils.arch_pointer_size_in_bytes project)) in
{ state with TypeInfo.stack = stack}
| (Some(offset), None) ->
let stack = Mem_region.remove state.TypeInfo.stack ~pos:offset ~size:(Bitvector.of_int (Size.in_bytes size) ~width:(Symbol_utils.arch_pointer_size_in_bytes project)) in
{ state with TypeInfo.stack = stack}
| _ -> state
end
let stack_offset = TypeInfo.compute_stack_offset state addr_exp ~sub_tid ~project in
let value = type_of_exp value_exp state ~sub_tid ~project in
let addr_type = type_of_exp addr_exp state ~sub_tid ~project in
let (targets_stack, target_is_unique) = match addr_type with
| Some(Ok(Pointer(targets))) -> (Option.is_some (Map.find targets sub_tid), Map.length targets = 1)
| _ -> (false, false) in
let pointer_size = Symbol_utils.arch_pointer_size_in_bytes project in
if targets_stack then
match stack_offset with
| Some(offset) ->
let new_stack =
if Size.in_bytes size = pointer_size then
match value with
| Some(Ok(inner_value)) -> Mem_region.add state.TypeInfo.stack inner_value ~pos:offset ~size:(pointer_size_as_bitvector project)
| Some(Error(_)) -> Mem_region.mark_error state.TypeInfo.stack ~pos:offset ~size:(Bitvector.of_int (Size.in_bytes size) ~width:pointer_size)
| None -> Mem_region.remove state.TypeInfo.stack ~pos:offset ~size:(Bitvector.of_int (Size.in_bytes size) ~width:pointer_size)
else (* store has to be data *)
Mem_region.add state.TypeInfo.stack Register.Data ~pos:offset ~size:(Bitvector.of_int (Size.in_bytes size) ~width:pointer_size) in
let new_state = { state with TypeInfo.stack = new_stack } in
if target_is_unique then (* previous value on the stack gets overwritten *)
new_state
else (* previous value on the stack may have been overwritten. We merge the two possible states to account for both cases *)
TypeInfo.merge state new_state
| None -> begin
if target_is_unique then (* There is a write on the stack, but we do not know where. To prevent our knowledge of the stack to get corrupted, we delete it. *)
{ state with TypeInfo.stack = Mem_region.empty ()}
else (* There may have been a write to the stack, but we do not know where. We optimistically assume that if it was a write, it did not change the TypeInfo there. *)
state
end
else (* store does not change the stack *)
state
| _ -> state
let add_mem_address_registers state exp ~project =
(* adds address registers of Loads and Stores to the list of known pointer register.
Note that this is a source of pointers, where we do not know where they point to.
This may confuse algorithms, if they assume that the pointer target list is exhaustive. *)
let add_mem_address_registers state exp ~sub_tid ~project =
let exp_list = nested_exp_list exp in
List.fold exp_list ~init:state ~f:(fun state exp ->
match exp with
......@@ -247,16 +398,28 @@ let add_mem_address_registers state exp ~project =
match addr_exp with
| Bil.Var(addr)
| Bil.BinOp(Bil.PLUS, Bil.Var(addr), Bil.Int(_))
| Bil.BinOp(Bil.PLUS, Bil.Int(_), Bil.Var(addr))
| Bil.BinOp(Bil.MINUS, Bil.Var(addr), Bil.Int(_))
| Bil.BinOp(Bil.AND, Bil.Var(addr), Bil.Int(_))
| Bil.BinOp(Bil.OR, Bil.Var(addr), Bil.Int(_)) ->
{ state with TypeInfo.reg = Map.set state.TypeInfo.reg addr (Ok(Register.Pointer)) } (* TODO: there are some false positives here for indices in global data arrays, where the immediate is the pointer. Maybe remove all cases with potential false positives? *)
| Bil.BinOp(Bil.AND, Bil.Int(_), Bil.Var(addr))
| Bil.BinOp(Bil.OR, Bil.Var(addr), Bil.Int(_))
| Bil.BinOp(Bil.OR, Bil.Int(_), Bil.Var(addr)) ->
begin match Map.find state.TypeInfo.reg addr with
| Some(Ok(Pointer(_))) -> state
| _ -> { state with TypeInfo.reg = Map.set state.TypeInfo.reg addr (Ok(Register.Pointer(Tid.Map.empty))) } (* TODO: there are some false positives here for indices in global data arrays, where the immediate is the pointer. Maybe remove all cases with potential false positives? *)
end
| Bil.BinOp(Bil.PLUS, Bil.Var(addr), exp2)
| Bil.BinOp(Bil.PLUS, exp2, Bil.Var(addr))
| Bil.BinOp(Bil.MINUS, Bil.Var(addr), exp2)
| Bil.BinOp(Bil.AND, Bil.Var(addr), exp2)
| Bil.BinOp(Bil.OR, Bil.Var(addr), exp2) ->
if type_of_exp exp2 state project = Some(Ok(Register.Data)) then
{ state with TypeInfo.reg = Map.set state.TypeInfo.reg addr (Ok(Register.Pointer)) }
| Bil.BinOp(Bil.AND, exp2, Bil.Var(addr))
| Bil.BinOp(Bil.OR, Bil.Var(addr), exp2)
| Bil.BinOp(Bil.OR, exp2, Bil.Var(addr)) ->
if type_of_exp exp2 state sub_tid project = Some(Ok(Register.Data)) then
begin match Map.find state.TypeInfo.reg addr with
| Some(Ok(Pointer(_))) -> state
| _ -> { state with TypeInfo.reg = Map.set state.TypeInfo.reg addr (Ok(Register.Pointer(Tid.Map.empty))) }
end
else
state
| _ -> state
......@@ -265,99 +428,136 @@ let add_mem_address_registers state exp ~project =
)
(* updates the stack offset if a definition changes the stack pointer value.
TODO: Bil.AND, Bil.OR are ignored because we do not track alignment yet. *)
let update_stack_offset state def ~project =
let stack_register = Symbol_utils.stack_register project in
if Def.lhs def = stack_register && Option.is_some state.TypeInfo.stack_offset then
match Def.rhs def with
| Bil.BinOp(Bil.PLUS, Bil.Var(var), Bil.Int(value)) ->
if var = stack_register then
TypeInfo.stack_offset_add state value
else
{ state with TypeInfo.stack_offset = None }
| Bil.BinOp(Bil.MINUS, Bil.Var(var), Bil.Int(value)) ->
if var = stack_register then
TypeInfo.stack_offset_add state (Bitvector.neg (Bitvector.signed value))
else
{ state with TypeInfo.stack_offset = None }
| _ -> { state with TypeInfo.stack_offset = None }
else
state
(* Remove any knowledge of the stack (except the stack_offset) and the registers (except stack and flag registers) from the state. *)
let keep_only_stack_offset state ~project =
let empty_state = TypeInfo.empty() in
{ empty_state with
TypeInfo.stack_offset = state.TypeInfo.stack_offset;
TypeInfo.reg = TypeInfo.get_stack_pointer_and_flags project }
let update_state_def state def ~project =
let keep_only_stack_register state ~sub_tid ~project =
let stack_pointer_value = Map.find state.TypeInfo.reg (Symbol_utils.stack_register project) in
let new_state = TypeInfo.only_stack_pointer_and_flags sub_tid project in
match stack_pointer_value with
| Some(value) -> { new_state with TypeInfo.reg = Map.set state.reg (Symbol_utils.stack_register project) value }
| None -> new_state
let update_state_def state def ~sub_tid ~project =
(* add all registers that are used as address registers in load/store expressions to the state *)
let state = add_mem_address_registers state (Def.rhs def) project in
let state = match type_of_exp (Def.rhs def) state project with
let state = add_mem_address_registers state (Def.rhs def) sub_tid project in
(* update the lhs of the definition with its new type *)
let state = match type_of_exp (Def.rhs def) state sub_tid project with
| Some(value) ->
let reg = Map.set state.TypeInfo.reg (Def.lhs def) value in
{ state with TypeInfo.reg = reg }
| None -> (* We don't know the type of the new value *)
let reg = Map.remove state.TypeInfo.reg (Def.lhs def) in
{ state with TypeInfo.reg = reg } in
(* update stack offset and maybe write something to the stack *)
let state = update_stack_offset state def ~project in
let state = set_stack_elem state (Def.rhs def) ~project in
(* write something to the stack if the definition is a store to the stack *)
let state = set_stack_elem state (Def.rhs def) ~sub_tid ~project in
state
let update_state_jmp state jmp ~project =
(** Add an integer to stack offset. *)
let add_to_stack_offset state num ~project =
match Map.find state.TypeInfo.reg (Symbol_utils.stack_register project) with
| Some(Ok(stack_register)) ->
let pointer_size = Symbol_utils.arch_pointer_size_in_bytes project in
let new_stack_value = Register.add_to_offsets stack_register (Some(Ok(Bitvector.of_int num ~width:(pointer_size * 8)))) in
{ state with TypeInfo.reg = Map.set state.TypeInfo.reg ~key:(Symbol_utils.stack_register project) ~data:(Ok(new_stack_value)) }
| _ -> state (* There is no known stack offset, so we return the old state. *)
(* TODO: Add entry to config for this? Since type inference is its own bap-pass, this may need a new config file...
Also important: update_state_jmp makes a lot of assumptions about the functions (like it does not interact with the stack).
If this list gets configurable, we probably need a concept how to annotate these types of assumptions in config files. *)
(** returns a list of known malloc-like functions. *)
let malloc_like_function_list () =
["malloc"; "calloc"; "realloc";]
(** updates the state on a call to a malloc-like function. Notable assumptions for
malloc-like functions:
- only one return register, which returns a unique pointer to a newly allocated
memory region. Note: Possible zero returns are handled by the CWE-476-check.
- the malloc-like-function does not touch the stack
- the standard calling convention of the target architecture is used. *)
let update_state_malloc_call state malloc_like_tid jmp_term ~project =
(* only keep callee-saved register information. Stack information is also kept. TODO: maybe add a "cut"-function to remove all stack info below the stack pointer? *)
let state = { state with TypeInfo.reg = Var.Map.filter_keys state.TypeInfo.reg ~f:(fun var -> Cconv.is_callee_saved var project) } in
(* add the return register with its new pointer target. The target is identified by the tid of the jmp instruction. *)
let malloc_like_fn = Term.find_exn sub_t (Project.program project) malloc_like_tid in
let arguments = Term.enum arg_t malloc_like_fn in
let return_arg_opt = Seq.find arguments ~f:(fun arg -> (* TODO: check whether there exists more than one return register! *)
match Bap.Std.Arg.intent arg with
| Some(Out) | Some(Both) -> true
| _ -> false
) in
let return_arg = match return_arg_opt with
| Some(x) -> x
| None -> failwith "[CWE-checker] malloc-like function has no return register" in
let return_reg = match Bap.Std.Arg.rhs return_arg with
| Bil.Var(var) -> var
| _ -> failwith "[CWE-checker] Return register of malloc-like function wasn't a register." in
let target_map = Map.set Tid.Map.empty (Term.tid jmp_term) { PointerTargetInfo.offset = Some(Ok(Bitvector.of_int 0 ~width:(Symbol_utils.arch_pointer_size_in_bytes project * 8))); alignment = None} in
{ state with TypeInfo.reg = Var.Map.set state.reg ~key:return_reg ~data:(Ok(Pointer(target_map))) }
(* TODO: Right now the conditional expression is not checked! Thus for conditional calls
(if bap generates conditional calls) the state would always be the state as if the call
branch has been taken even for the other branch. The way that the bap fixpoint function
works this could be quite complicated to implement. *)
let update_state_jmp state jmp ~sub_tid ~project =
match Jmp.kind jmp with
| Call(call) -> begin match Call.target call with
| Call(call) ->
let return_state = match Call.target call with
| Direct(tid) ->
let func_name = match String.lsplit2 (Tid.name tid) ~on:'@' with
| Some(_left, right) -> right
| None -> Tid.name tid in
if String.Set.mem (Cconv.parse_dyn_syms project) func_name then
let empty_state = TypeInfo.empty () in (* TODO: to preserve stack information we need to be sure that the callee does not write on the stack => needs pointer source tracking! *)
{ empty_state with
TypeInfo.stack_offset = state.TypeInfo.stack_offset;
TypeInfo.reg = Var.Map.filter_keys state.TypeInfo.reg ~f:(fun var -> Cconv.is_callee_saved var project) }
begin if List.exists (malloc_like_function_list ()) ~f:(fun elem -> elem = func_name) then
update_state_malloc_call state tid jmp project
else
let empty_state = TypeInfo.empty () in (* TODO: to preserve stack information we need to be sure that the callee does not write on the stack. Can we already check that? *)
{ empty_state with
TypeInfo.reg = Var.Map.filter_keys state.TypeInfo.reg ~f:(fun var -> Cconv.is_callee_saved var project) }
end
else
keep_only_stack_offset state project (* TODO: add interprocedural analysis here. *)
| Indirect(_) -> keep_only_stack_offset state project (* TODO: when we have value tracking and interprocedural analysis, we can add indirect calls to the regular analysis. *)
end
keep_only_stack_register state sub_tid project (* TODO: add interprocedural analysis here. *)
| Indirect(_) -> keep_only_stack_register state sub_tid project in (* TODO: when we have value tracking and interprocedural analysis, we can add indirect calls to the regular analysis. *)
(* The callee is responsible for removing the return address from the stack, so we have to adjust the stack offset accordingly. *)
(* TODO: x86/x64, arm, mips and ppc all use descending stacks and we assume here that a descending stack is used. Can this be checked by some info given from bap? Is there an architecture with an upward growing stack? *)
add_to_stack_offset return_state (Symbol_utils.arch_pointer_size_in_bytes project) project
| Int(_, _) -> (* TODO: We need stubs and/or interprocedural analysis here *)
keep_only_stack_offset state project
keep_only_stack_register state sub_tid project (* TODO: Are there cases where the stack offset has to be adjusted here? *)
| Goto(Indirect(Bil.Var(var))) (* TODO: warn when jumping to something that is marked as data. *)
| Ret(Indirect(Bil.Var(var))) ->
let reg = Map.set state.TypeInfo.reg var (Ok(Register.Pointer)) in
{ state with TypeInfo.reg = reg }
begin match Map.find state.TypeInfo.reg var with
| Some(Ok(Pointer(_))) -> state
| _ -> { state with TypeInfo.reg = Map.set state.TypeInfo.reg var (Ok(Register.Pointer(Tid.Map.empty))) }
end
| Goto(_)
| Ret(_) -> state
(* This is public for unit test purposes. *)
let update_type_info block_elem state ~project =
let update_type_info block_elem state ~sub_tid ~project =
match block_elem with
| `Def def -> update_state_def state def ~project
| `Def def -> update_state_def state def ~sub_tid ~project
| `Phi phi -> state (* We ignore phi terms for this analysis. *)
| `Jmp jmp -> update_state_jmp state jmp ~project
| `Jmp jmp -> update_state_jmp state jmp ~sub_tid ~project
(** updates a block analysis. *)
let update_block_analysis block register_state ~project =
let update_block_analysis block register_state ~sub_tid ~project =
(* get all elements (Defs, Jumps, Phi-nodes) in the correct order *)
let elements = Blk.elts block in
let register_state = Seq.fold elements ~init:register_state ~f:(fun state element ->
update_type_info element state ~project
update_type_info element state ~sub_tid ~project
) in
TypeInfo.remove_virtual_registers register_state (* virtual registers should not be accessed outside of the block where they are defined. *)
let intraprocedural_fixpoint func ~project =
let cfg = Sub.to_cfg func in
let sub_tid = Term.tid func in
(* default state for nodes *)
let only_sp = { (TypeInfo.empty ()) with TypeInfo.reg = TypeInfo.get_stack_pointer_and_flags project } in
let only_sp = TypeInfo.only_stack_pointer_and_flags sub_tid project in
try
(* Create a starting solution where only the first block of a function knows the stack_offset. *)
let fn_start_state = TypeInfo.function_start_state project in
let fn_start_state = TypeInfo.function_start_state sub_tid project in
let fn_start_block = Option.value_exn (Term.first blk_t func) in
let fn_start_state = update_block_analysis fn_start_block fn_start_state ~project in
let fn_start_state = update_block_analysis fn_start_block fn_start_state ~sub_tid ~project in
let fn_start_node = Seq.find_exn (Graphs.Ir.nodes cfg) ~f:(fun node -> (Term.tid fn_start_block) = (Term.tid (Graphs.Ir.Node.label node))) in
let empty = Map.empty (module Graphs.Ir.Node) in
let with_start_node = Map.set empty fn_start_node fn_start_state in
......@@ -366,7 +566,7 @@ let intraprocedural_fixpoint func ~project =
let merge = TypeInfo.merge in
let f = (fun node state ->
let block = Graphs.Ir.Node.label node in
update_block_analysis block state ~project
update_block_analysis block state ~sub_tid ~project
) in
Graphlib.Std.Graphlib.fixpoint (module Graphs.Ir) cfg ~steps:100 ~rev:false ~init:init ~equal:equal ~merge:merge ~f:f
with
......@@ -375,24 +575,24 @@ let intraprocedural_fixpoint func ~project =
(** Extract the starting state of a node. *)
let extract_start_state node ~cfg ~solution ~project =
let extract_start_state node ~cfg ~solution ~sub_tid ~project =
let predecessors = Graphs.Ir.Node.preds node cfg in
if Seq.is_empty predecessors then
TypeInfo.function_start_state project (* This should be the first block of a function. Maybe add a test for when there is more than one such block in a function? *)
TypeInfo.function_start_state sub_tid project (* This should be the first block of a function. Maybe add a test for when there is more than one such block in a function? *)
else
let only_sp = { (TypeInfo.empty ()) with TypeInfo.reg = TypeInfo.get_stack_pointer_and_flags project } in
let only_sp = TypeInfo.only_stack_pointer_and_flags sub_tid project in
Seq.fold predecessors ~init:only_sp ~f:(fun state node ->
TypeInfo.merge state (Graphlib.Std.Solution.get solution node)
)
(** Returns a list of pairs (tid, state) for each def in a (blk_t-)node. The state
is the state _after execution of the node. *)
let state_list_def node ~cfg ~solution ~project =
let input_state = extract_start_state node ~cfg ~solution ~project in
let state_list_def node ~cfg ~solution ~sub_tid ~project =
let input_state = extract_start_state node ~cfg ~solution ~sub_tid ~project in
let block = Graphs.Ir.Node.label node in
let defs = Term.enum def_t block in
let (output, _) = Seq.fold defs ~init:([], input_state) ~f:(fun (list_, state) def ->
let state = update_state_def state def project in
let state = update_state_def state def sub_tid project in
( (Term.tid def, state) :: list_, state)
) in
output
......@@ -401,11 +601,12 @@ let state_list_def node ~cfg ~solution ~project =
let compute_pointer_register project =
let program = Project.program project in
let program_with_tags = Term.map sub_t program ~f:(fun func ->
let cfg = Sub.to_cfg func in
let cfg = Sub.to_cfg func in
let sub_tid = Term.tid func in
let solution = intraprocedural_fixpoint func project in
Seq.fold (Graphs.Ir.nodes cfg) ~init:func ~f:(fun func node ->
let block = Graphs.Ir.Node.label node in
let start_state = extract_start_state node cfg solution project in
let start_state = extract_start_state node cfg solution sub_tid project in
let tagged_block = Term.set_attr block type_info_tag start_state in
Term.update blk_t func tagged_block
)
......@@ -413,20 +614,34 @@ let compute_pointer_register project =
Project.with_program project program_with_tags
(** Prints type info to debug. *)
let print_type_info_to_debug state block_tid ~tid_map =
let print_type_info_to_debug state block_tid ~tid_map ~sub_tid ~project =
let register_list = Map.fold state.TypeInfo.reg ~init:[] ~f:(fun ~key:var ~data:reg str_list ->
match reg with
| Ok(Register.Pointer) -> (Var.name var ^ ":Pointer, ") :: str_list
| Ok(Register.Pointer(targets)) ->
(Var.name var ^ ":Pointer(targets: " ^
(Map.fold targets ~init:"" ~f:(fun ~key ~data accum_string -> (Tid.name key) ^ "," ^ accum_string)) ^
")") :: str_list
| Ok(Register.Data) -> (Var.name var ^ ":Data, ") :: str_list
| Error(_) -> (Var.name var ^ ":Error, ") :: str_list ) in
let register_string = String.concat register_list in
let stack_offset_str = match state.TypeInfo.stack_offset with
| Some(Ok(x)) -> begin
match Bitvector.to_int (Bitvector.signed x) with
| Ok(number) -> string_of_int number
| _ -> "NaN"
end
| _ -> "Unknown" in
let stack_offset_str =
match Map.find state.TypeInfo.reg (Symbol_utils.stack_register project) with
| Some(Ok(Pointer(targets))) ->
begin match Map.find targets sub_tid with
| Some(target) ->
begin match target.PointerTargetInfo.offset with
| Some(Ok(x)) ->
begin match Bitvector.to_int (Bitvector.signed x) with
| Ok(number) -> string_of_int number
| _ -> "NaN"
end
| Some(Error()) -> "Unknown (Error)"
| _ -> "Unknown"
end
| None -> "Unknown"
end
| _ -> "Unknown"
in
Log_utils.debug
"[%s] {%s} TypeInfo at %s:\nRegister: %s\nStackOffset: %s"
name
......@@ -439,10 +654,11 @@ let print_type_info_tags ~project ~tid_map =
let program = Project.program project in
let functions = Term.enum sub_t program in
Seq.iter functions ~f:(fun func ->
let sub_tid = Term.tid func in
let blocks = Term.enum blk_t func in
Seq.iter blocks ~f:(fun block ->
match Term.get_attr block type_info_tag with
| Some(start_state) -> print_type_info_to_debug start_state (Term.tid block) ~tid_map
| Some(start_state) -> print_type_info_to_debug start_state (Term.tid block) ~tid_map ~sub_tid ~project
| None -> (* block has no type info tag, which should not happen *)
Log_utils.error
"[%s] {%s} Block has no TypeInfo at %s (block TID %s)"
......@@ -453,7 +669,17 @@ let print_type_info_tags ~project ~tid_map =
)
)
(* Functions made public for unit tests *)
module Test = struct
(* Functions made available for unit tests *)
module Private = struct
let update_block_analysis = update_block_analysis
let function_start_state = TypeInfo.function_start_state
let compute_stack_offset = TypeInfo.compute_stack_offset
let only_stack_pointer_and_flags = TypeInfo.only_stack_pointer_and_flags
let merge_type_infos = TypeInfo.merge
let type_info_equal = TypeInfo.equal
end
(* This file contains analysis passes for type recognition *)
(* This file contains analysis passes for type recognition.
It can annotate whether registers or values on the stack hold data or pointers
to memory. For the latter the target memory location is also tracked if known.
Pointers to the heap are tracked by tracking calls to malloc, calloc and realloc.
This analysis does not check whether the return values of these calls are checked
for NULL values (see cwe_476 for that). *)
open Bap.Std
open Core_kernel
(** The PointerTargetInfo contains knowledge about the offset and the alignment of
a pointer into a memory area. Here the alignment is always considered relative
offset zero of the target memory area. *)
module PointerTargetInfo : sig
type t = {
offset: (Bitvector.t, unit) Result.t Option.t;
alignment: (int, unit) Result.t Option.t;
} [@@deriving bin_io, compare, sexp]
end
(** The register type. *)
(** The Register.t type. A register holds either arbitrary data or a pointer to some
memory region. We do track possible targets of the pointer as follows:
- heap objects: tid of corresponding call instruction to malloc, calloc, etc.
- current stack frame: sub_tid of current function
- some other stack frame: tid of corresponding call that left the stack frame.
This way we can distinguish between current stack pointers and pointers to the
stack frame of the same function coming from recursive calls. *)
module Register : sig
type t =
| Pointer
| Pointer of PointerTargetInfo.t Tid.Map.t
| Data
[@@deriving bin_io, compare, sexp]
end
(** The TypeInfo module. A TypeInfo.t structure holds a list of registers with known
type information (see Register.t type) and known type information for values
on the stack. *)
module TypeInfo : sig
type reg_state = (Register.t, unit) Result.t Var.Map.t [@@deriving bin_io, compare, sexp]
type t = {
stack: Register.t Mem_region.t;
stack_offset: (Bitvector.t, unit) Result.t Option.t;
reg: reg_state;
} [@@deriving bin_io, compare, sexp]
......@@ -26,6 +47,8 @@ module TypeInfo : sig
val pp: Format.formatter -> t -> unit
end
(** A tag for TypeInfo.t, so that we can annotate basic blocks with known type information
using bap tags. *)
val type_info_tag: TypeInfo.t Value.tag
(** Computes TypeInfo for the given project. Adds tags to each block containing the
......@@ -38,10 +61,21 @@ val print_type_info_tags: project:Project.t -> tid_map:word Tid.Map.t -> unit
(** Updates the type info for a single element (Phi/Def/Jmp) of a block. Input
is the type info before execution of the element, output is the type info
after execution of the element. *)
val update_type_info: Blk.elt -> TypeInfo.t -> project:Project.t -> TypeInfo.t
after execution of the element. sub_tid is the Tid of the current function
which is internally used to mark which pointers point to the current stack frame.*)
val update_type_info: Blk.elt -> TypeInfo.t -> sub_tid:Tid.t -> project:Project.t -> TypeInfo.t
(* functions made available for unit tests: *)
module Private : sig
val update_block_analysis: Blk.t -> TypeInfo.t -> sub_tid:Tid.t -> project:Project.t -> TypeInfo.t
val function_start_state: Tid.t -> Project.t -> TypeInfo.t
val compute_stack_offset: TypeInfo.t -> Exp.t -> sub_tid:Tid.t -> project:Project.t -> Bitvector.t Option.t
val only_stack_pointer_and_flags: Tid.t -> Project.t -> TypeInfo.t
val merge_type_infos: TypeInfo.t -> TypeInfo.t -> TypeInfo.t
(* functions made public for unit tests: *)
module Test : sig
val update_block_analysis: Blk.t -> TypeInfo.t -> project:Project.t -> TypeInfo.t
val type_info_equal: TypeInfo.t -> TypeInfo.t -> bool
end
......@@ -48,8 +48,9 @@ let parse_dyn_sym_line line =
line := String.strip left;
str_list := right :: !str_list;
done;
str_list := !line :: !str_list;
match !str_list with
| _ :: value :: _ :: "FUNC" :: _ :: _ :: _ :: name :: [] -> begin
| _ :: value :: _ :: "FUNC" :: _ :: _ :: _ :: name :: _ -> begin
match ( String.strip ~drop:(fun x -> x = '0') value, String.lsplit2 name ~on:'@') with
| ("", Some(left, _)) -> Some(left)
| ("", None) -> Some(name)
......
all:
bapbundle remove cwe_checker_unit_tests.plugin
bapbuild -r -Is analysis cwe_checker_unit_tests.plugin -pkgs core,alcotest,yojson,unix,ppx_jane,cwe_checker_core
bapbuild -r -Is analysis,utils cwe_checker_unit_tests.plugin -pkgs core,alcotest,yojson,unix,ppx_jane,cwe_checker_core
bapbundle install cwe_checker_unit_tests.plugin
bap ../artificial_samples/build/arrays_x64.out --pass=cwe-checker-unit-tests
bapbundle remove cwe_checker_unit_tests.plugin
......
......@@ -3,12 +3,16 @@ open Core_kernel
open Cwe_checker_core
open Type_inference
open Type_inference.Test
open Type_inference.Private
let check msg x = Alcotest.(check bool) msg true x
let example_project = ref None
(** create a bitvector with value x and width the width of pointers in the example project. *)
let bv x =
Bitvector.of_int x ~width:(Symbol_utils.arch_pointer_size_in_bytes (Option.value_exn !example_project) * 8)
(* TODO: As soon as more pointers than stack pointer are tracked, add more tests! *)
let create_block_from_defs def_list =
......@@ -16,61 +20,174 @@ let create_block_from_defs def_list =
let () = List.iter def_list ~f:(fun def -> Blk.Builder.add_def block def) in
Blk.Builder.result block
let start_state stack_register project =
let bv x = Bitvector.of_int x ~width:(Symbol_utils.arch_pointer_size_in_bytes project * 8) in
let start_reg = Var.Map.empty in
let start_reg = Map.add_exn start_reg ~key:stack_register ~data:(Ok(Register.Pointer)) in
{ TypeInfo.stack = Mem_region.empty ();
TypeInfo.stack_offset = Some (Ok(bv 0));
TypeInfo.reg = start_reg;
}
let test_update_stack_offset () =
let test_preamble () =
let project = Option.value_exn !example_project in
let bv x = Bitvector.of_int x ~width:(Symbol_utils.arch_pointer_size_in_bytes project * 8) in
let stack_register = Symbol_utils.stack_register project in
let fn_start_state = start_state stack_register project in
let sub = Sub.create ~name:"TestSub" () in
let sub_tid = Term.tid sub in
let fn_start_state = function_start_state sub_tid project in
(project, stack_register, sub, sub_tid, fn_start_state)
let test_update_stack_offset () =
let (project, stack_register, sub, sub_tid, fn_start_state) = test_preamble () in
let def1 = Def.create stack_register (Bil.binop Bil.plus (Bil.var stack_register) (Bil.int (bv 8))) in
let def2 = Def.create stack_register (Bil.binop Bil.minus (Bil.var stack_register) (Bil.int (bv 16))) in
let block = create_block_from_defs [def1; def2] in
let state = update_block_analysis block fn_start_state project in
let () = check "update_stack_offset" (state.TypeInfo.stack_offset = Some(Ok(Bitvector.unsigned (bv (-8))))) in
let state = update_block_analysis block fn_start_state sub_tid project in
let () = check "update_stack_offset" ( (compute_stack_offset state (Bil.var stack_register) sub_tid project) = Some(Bitvector.unsigned (bv (-8)))) in
()
let test_preserve_stack_offset_on_stubs () =
let (project, stack_register, sub, sub_tid, fn_start_state) = test_preamble () in
let register1 = Var.create "Register1" (Bil.Imm (Symbol_utils.arch_pointer_size_in_bytes project * 8)) in
let mem_reg = Var.create "Mem_reg" (Bil.Imm (Symbol_utils.arch_pointer_size_in_bytes project * 8)) in
let def1 = Def.create register1 (Bil.unop Bil.NEG (Bil.var register1)) in
let def2 = Def.create mem_reg (Bil.Store ((Bil.var mem_reg), (Bil.binop Bil.PLUS (Bil.var stack_register) (Bil.int (bv (-8)))), (Bil.var register1), Bitvector.LittleEndian, `r64)) in
let call_term = Jmp.create (Call (Call.create ~target:(Label.direct sub_tid) () )) in
let block = Blk.Builder.create () in
let () = Blk.Builder.add_def block def1 in
let () = Blk.Builder.add_def block def2 in
let () = Blk.Builder.add_jmp block call_term in
let block = Blk.Builder.result block in
let state = update_block_analysis block fn_start_state sub_tid project in
let pointer_size = Symbol_utils.arch_pointer_size_in_bytes project in (* since the callee removes the return address from the stack, the stack offset is adjusted accordingly. *)
let () = check "preserve_stack_offset_inner_call" ( (compute_stack_offset state (Bil.var stack_register) sub_tid project) = Some(Bitvector.unsigned (bv pointer_size))) in
let () = check "delete_stack_info_inner_call" (Mem_region.get state.TypeInfo.stack (bv (-8)) = None) in
(* find the malloc extern call. This fails if the example project does not contain a call to malloc. *)
let malloc_sub = Seq.find_exn (Term.enum sub_t (Project.program project)) ~f:(fun sub -> Sub.name sub = "malloc") in
let call_term = Jmp.create (Call (Call.create ~target:(Label.direct (Term.tid malloc_sub)) () )) in
let block = Blk.Builder.create () in
let () = Blk.Builder.add_def block def1 in
let () = Blk.Builder.add_def block def2 in
let () = Blk.Builder.add_jmp block call_term in
let block = Blk.Builder.result block in
let state = update_block_analysis block fn_start_state sub_tid project in
let () = check "preserve_stack_offset_extern_malloc_call" ( (compute_stack_offset state (Bil.var stack_register) sub_tid project) = Some(Bitvector.unsigned (bv pointer_size))) in
let () = check "preserve_stack_info_extern_malloc_call" (Mem_region.get state.TypeInfo.stack (bv (-8)) = Some(Ok((Data, bv 8)))) in
(* find the "free" extern call. This fails if the example project does not contain a call to "free". *)
let extern_sub = Seq.find_exn (Term.enum sub_t (Project.program project)) ~f:(fun sub -> Sub.name sub = "free") in
let call_term = Jmp.create (Call (Call.create ~target:(Label.direct (Term.tid extern_sub)) () )) in
let block = Blk.Builder.create () in
let () = Blk.Builder.add_def block def1 in
let () = Blk.Builder.add_def block def2 in
let () = Blk.Builder.add_jmp block call_term in
let block = Blk.Builder.result block in
let state = update_block_analysis block fn_start_state sub_tid project in
let () = check "preserve_stack_offset_extern_call" ( (compute_stack_offset state (Bil.var stack_register) sub_tid project) = Some(Bitvector.unsigned (bv pointer_size))) in
let () = check "delete_stack_info_extern_call" (Mem_region.get state.TypeInfo.stack (bv (-8)) <> Some(Ok((Data, bv 8)))) in
()
let test_update_reg () =
let project = Option.value_exn !example_project in
let bv x = Bitvector.of_int x ~width:(Symbol_utils.arch_pointer_size_in_bytes project * 8) in
let stack_register = Symbol_utils.stack_register project in
let fn_start_state = start_state stack_register project in
let (project, stack_register, sub, sub_tid, fn_start_state) = test_preamble () in
let register1 = Var.create "Register1" (Bil.Imm (Symbol_utils.arch_pointer_size_in_bytes project * 8)) in
let register2 = Var.create "Register2" (Bil.Imm (Symbol_utils.arch_pointer_size_in_bytes project * 8)) in
let def1 = Def.create register1 (Bil.binop Bil.AND (Bil.var stack_register) (Bil.int (bv 8))) in
let def2 = Def.create register2 (Bil.binop Bil.XOR (Bil.var register1) (Bil.var stack_register)) in
let block = create_block_from_defs [def1; def2] in
let state = update_block_analysis block fn_start_state project in
let () = check "update_pointer_register" (Var.Map.find state.TypeInfo.reg register1 = Some(Ok(Pointer))) in
let state = update_block_analysis block fn_start_state sub_tid project in
let () = check "update_pointer_register" (
match Var.Map.find state.TypeInfo.reg register1 with
| Some(Ok(Pointer(_))) -> true
|_ -> false
) in
let () = check "update_data_register" (Var.Map.find state.TypeInfo.reg register2 = Some(Ok(Data))) in
let def1 = Def.create register1 (Bil.Load (Bil.var register1, Bil.var register2, Bitvector.LittleEndian, `r64) ) in
let block = create_block_from_defs [def1;] in
let state = update_block_analysis block fn_start_state project in
let () = check "add_mem_address_registers" (Var.Map.find state.TypeInfo.reg register2 = Some(Ok(Pointer))) in
let state = update_block_analysis block fn_start_state sub_tid project in
let () = check "add_mem_address_registers" (
match Var.Map.find state.TypeInfo.reg register2 with
| Some(Ok(Pointer(_))) -> true
| _ -> false
) in
()
let test_update_stack () =
let project = Option.value_exn !example_project in
let bv x = Bitvector.of_int x ~width:(Symbol_utils.arch_pointer_size_in_bytes project * 8) in
let stack_register = Symbol_utils.stack_register project in
let fn_start_state = start_state stack_register project in
let (project, stack_register, sub, sub_tid, fn_start_state) = test_preamble () in
let register1 = Var.create "Register1" (Bil.Imm (Symbol_utils.arch_pointer_size_in_bytes project * 8)) in
let register2 = Var.create "Register2" (Bil.Imm (Symbol_utils.arch_pointer_size_in_bytes project * 8)) in
let mem_reg = Var.create "Mem_reg" (Bil.Imm (Symbol_utils.arch_pointer_size_in_bytes project * 8)) in
let def1 = Def.create register1 (Bil.binop Bil.AND (Bil.var stack_register) (Bil.int (bv 8))) in
let def2 = Def.create mem_reg (Bil.Store ((Bil.var mem_reg), (Bil.binop Bil.PLUS (Bil.var stack_register) (Bil.int (bv (-8)))), (Bil.var stack_register), Bitvector.LittleEndian, `r64)) in
let def3 = Def.create register2 (Bil.Load (Bil.var register2, (Bil.binop Bil.MINUS (Bil.var stack_register) (Bil.int (bv 8))), Bitvector.LittleEndian, `r64) ) in
let def3 = Def.create register2 (Bil.Load (Bil.var mem_reg, (Bil.binop Bil.MINUS (Bil.var stack_register) (Bil.int (bv 8))), Bitvector.LittleEndian, `r64) ) in
let block = create_block_from_defs [def1; def2; def3;] in
let state = update_block_analysis block fn_start_state project in
let () = check "write_to_stack" ((Mem_region.get state.TypeInfo.stack (bv (-8))) = Some(Ok(Pointer, bv (Symbol_utils.arch_pointer_size_in_bytes project)))) in
let () = check "load_from_stack" (Var.Map.find state.TypeInfo.reg register2 = Some(Ok(Pointer))) in
let state = update_block_analysis block fn_start_state sub_tid project in
let () = check "write_to_stack" (
match Mem_region.get state.TypeInfo.stack (bv (-8)) with
| Some(Ok(Pointer(targets), size )) when size = bv (Symbol_utils.arch_pointer_size_in_bytes project) -> true
| _ -> false
) in
let () = check "load_from_stack" (
match Var.Map.find state.TypeInfo.reg register2 with
| Some(Ok(Pointer(_))) -> true
| _ -> false
) in
()
let test_address_registers_on_load_and_store () =
let (project, stack_register, sub, sub_tid, fn_start_state) = test_preamble () in
let register1 = Var.create "Register1" (Bil.Imm (Symbol_utils.arch_pointer_size_in_bytes project * 8)) in
let register2 = Var.create "Register2" (Bil.Imm (Symbol_utils.arch_pointer_size_in_bytes project * 8)) in
let mem_reg = Var.create "Mem_reg" (Bil.Imm (Symbol_utils.arch_pointer_size_in_bytes project * 8)) in
let def1 = Def.create register1 (Bil.binop Bil.XOR (Bil.var stack_register) (Bil.int (bv 8))) in
let def2 = Def.create mem_reg (Bil.Store ((Bil.var mem_reg), (Bil.var register1) , (Bil.var stack_register), Bitvector.LittleEndian, `r64)) in
let def3 = Def.create register2 (Bil.Load (Bil.var mem_reg, (Bil.binop Bil.MINUS (Bil.var stack_register) (Bil.int (bv 8))), Bitvector.LittleEndian, `r64) ) in
let block = create_block_from_defs [def1; def2; def3;] in
let state = update_block_analysis block fn_start_state sub_tid project in
let () = check "mark_store_address_as_pointer" (
match Map.find_exn state.TypeInfo.reg register1 with
| Ok(Pointer(targets)) -> (Map.is_empty targets)
| _ -> false
) in
let () = check "dont_change_offsets_on_address_register" (compute_stack_offset state (Bil.var stack_register) sub_tid project = Some(bv 0)) in
()
let test_merge_type_infos () =
let (project, stack_register, sub, sub_tid, fn_start_state) = test_preamble () in
let generic_empty_state = only_stack_pointer_and_flags sub_tid project in
let def1 = Def.create stack_register (Bil.binop Bil.plus (Bil.var stack_register) (Bil.int (bv 8))) in
let block = create_block_from_defs [def1;] in
let state1 = update_block_analysis block fn_start_state sub_tid project in
let state2 = update_block_analysis block generic_empty_state sub_tid project in
let merged_state = merge_type_infos state1 state1 in
let () = check "merge_same_stack_offset" (compute_stack_offset merged_state (Bil.var stack_register) sub_tid project = Some(Bitvector.unsigned (bv 8))) in
let merged_state = merge_type_infos fn_start_state state1 in
let () = check "merge_different_stack_offsets" (compute_stack_offset merged_state (Bil.var stack_register) sub_tid project = None) in
let merged_state = merge_type_infos generic_empty_state state1 in
let () = check "merge_with_unknown_stack_offset" (compute_stack_offset merged_state (Bil.var stack_register) sub_tid project = Some(Bitvector.unsigned (bv 8))) in
let merged_state = merge_type_infos generic_empty_state state2 in
let () = check "merge_empty_stack_offsets" (compute_stack_offset merged_state (Bil.var stack_register) sub_tid project = None) in
()
let test_type_info_equal () =
let (project, stack_register, sub, sub_tid, fn_start_state) = test_preamble () in
let generic_empty_state = only_stack_pointer_and_flags sub_tid project in
let () = check "empty_state_neq_fn_start_state" (false = (type_info_equal fn_start_state generic_empty_state)) in
()
let test_malloc_call_return_reg () =
let (project, stack_register, sub, sub_tid, fn_start_state) = test_preamble () in
(* find the malloc extern call. This fails if the example project does not contain a call to malloc. *)
let malloc_sub = Seq.find_exn (Term.enum sub_t (Project.program project)) ~f:(fun sub -> Sub.name sub = "malloc") in
let call_term = Jmp.create (Call (Call.create ~target:(Label.direct (Term.tid malloc_sub)) () )) in
let block = Blk.Builder.create () in
let () = Blk.Builder.add_jmp block call_term in
let block = Blk.Builder.result block in
let state = update_block_analysis block fn_start_state sub_tid project in
(* test whether the return register is marked as a pointer register. This fails if the example project is not a x64 binary. *)
let state_reg_list = Map.to_alist state.TypeInfo.reg in
let () = String.Set.iter (Cconv.parse_dyn_syms project) ~f:(fun elem -> print_endline elem) in
let () = check "malloc_return_register_marked" (match List.find state_reg_list ~f:(fun (var, register_info) -> Var.name var = "RAX") with
| Some((var, register_info)) -> (* TODO: test whether the target is set correctly. *)
begin match register_info with
| Ok(Pointer(targets)) ->
begin match Map.to_alist targets with
| (target_tid, _) :: [] -> target_tid = Term.tid call_term
| _ -> false
end
| _ -> false
end
| None -> false
) in
()
......@@ -78,4 +195,9 @@ let tests = [
"Update Stack Offset", `Quick, test_update_stack_offset;
"Update Register", `Quick, test_update_reg;
"Update Stack", `Quick, test_update_stack;
"Preserve Stack data on calls", `Quick, test_preserve_stack_offset_on_stubs;
"Merge TypeInfos", `Quick, test_merge_type_infos;
"Equality check for TypeInfos", `Quick, test_type_info_equal;
"Address register handling on load/store instructions", `Quick, test_address_registers_on_load_and_store;
"Malloc calls mark return register", `Quick, test_malloc_call_return_reg;
]
......@@ -4,9 +4,11 @@ open Cwe_checker_core
let run_tests project =
Type_inference_test.example_project := Some(project);
Cconv_test.example_project := Some(project);
Alcotest.run "Unit tests" ~argv:[|"DoNotComplainWhenRunAsABapPlugin";"--color=always";|] [
"Mem_region_tests", Mem_region_test.tests;
"Type_inference_tests", Type_inference_test.tests;
"Cconv_tests", Cconv_test.tests;
]
let () =
......
open Bap.Std
open Core_kernel
open Cwe_checker_core
open Cconv
let check msg x = Alcotest.(check bool) msg true x
let example_project = ref None
let test_callee_saved () =
(* this test assumes, that the example project is a x64 binary *)
let project = Option.value_exn !example_project in
let register = Var.create "RBX" (Bil.Imm (Symbol_utils.arch_pointer_size_in_bytes project * 8)) in
let () = check "callee_saved_register" (is_callee_saved register project) in
let register = Var.create "RAX" (Bil.Imm (Symbol_utils.arch_pointer_size_in_bytes project * 8)) in
let () = check "caller_saved_register" (is_callee_saved register project = false) in
()
let test_parse_dyn_syms () =
(* this test assumes, that the example project is the arrays_x64.out binary from the artificial samples. *)
let project = Option.value_exn !example_project in
let () = check "free_as_dyn_sym" (String.Set.mem (parse_dyn_syms project) "free") in
let () = check "__libc_start_main_as_dyn_sym" (String.Set.mem (parse_dyn_syms project) "__libc_start_main") in
let () = check "malloc_as_dyn_sym" (String.Set.mem (parse_dyn_syms project) "malloc") in
let () = check "__cxa_finalize_as_dyn_sym" (String.Set.mem (parse_dyn_syms project) "__cxa_finalize") in
let () = check "dyn_sym_count" (String.Set.count (parse_dyn_syms project) ~f:(fun elem -> true) = 4) in
()
let tests = [
"Callee saved register", `Quick, test_callee_saved;
"Parse dynamic symbols", `Quick, test_parse_dyn_syms;
]
open Bap.Std
open Core_kernel
val example_project: Project.t option ref
val tests: unit Alcotest.test_case list
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment