open Core_kernel
open Bap.Std
open Log_utils

let name = "CWE476"
let version = "0.3"

(* TODO: This check is based on Mem_region, which does not support partial access yet.
   Thus partially written tainted values may be marked as error and thus the taint is falsely forgotten. *)


(** Each taint is denoted by the Tid of the basic block where it originated from.
    Each value can be tainted by different sources at the same time. *)
module Taint = Tid.Set

(** The state contains taint information for all registers and stack variables. *)
module State = struct
  type t = {
    register: Taint.t Var.Map.t;
    stack: Taint.t Mem_region.t;
  } [@@deriving bin_io, compare, sexp]

  (** Get an empty state without tainted values. *)
  let empty : t =
    { register = Var.Map.empty;
      stack = Mem_region.empty () }

  (** equality function for states *)
  let equal (state1: t) (state2: t) : Bool.t =
    let reg_equal = Var.Map.equal Taint.equal state1.register state2.register in
    let stack_equal = Mem_region.equal state1.stack state2.stack ~data_equal:Taint.equal in
    reg_equal && stack_equal

  (** set the taint of a register *)
  let set_register (state: t) (register: Var.t) (taint: Taint.t) : t =
    { state with register = Var.Map.set state.register ~key:register ~data: taint}

  (** return the taint of a register *)
  let find_register (state: t) (register: Var.t) : Taint.t Option.t =
    Var.Map.find state.register register

  (** only remove the register var from the list of tainted registers *)
  let remove_register (state: t) (register: Var.t) : t =
    { state with register = Var.Map.remove state.register register }

  (** set the taint of a stack element *)
  let set_stack (state: t) ~(pos: Bitvector.t) ~(size: Bitvector.t) (taint: Taint.t) : t =
    { state with stack = Mem_region.add state.stack taint ~pos ~size }

  (** get the taint from the stack
      TODO: Mem_region is currently unsound for only partially loaded values, which might lead to errors here. *)
  let find_stack (state: t) ~(pos: Bitvector.t) : Taint.t Option.t =
    match Mem_region.get state.stack pos with
    | Some(Ok(taint, _size)) -> Some(taint)
    | _ -> None

  (** remove a stack element *)
  let remove_stack (state: t) ~(pos: Bitvector.t) ~(size: Bitvector.t) : t =
    { state with stack = Mem_region.remove state.stack ~pos ~size}

  (** remove all Tids contained in the taint from all taints in the state *)
  let remove_taint (state: t) (taint_to_remove: Taint.t) : t =
    let register_list = Var.Map.to_alist state.register in
    let cleaned_register = List.fold register_list ~init:Var.Map.empty ~f:(fun cleaned_register (register, taint) ->
      let cleaned_taint = Tid.Set.diff taint taint_to_remove in
      if Tid.Set.is_empty cleaned_taint then
        cleaned_register
      else
        Var.Map.set cleaned_register ~key:register ~data:cleaned_taint
    ) in
    let cleaned_stack = Mem_region.map_data state.stack ~f:(fun taint ->
      Tid.Set.diff taint taint_to_remove
    ) in
    { register = cleaned_register;
      stack = cleaned_stack; }

  (** The union of two states is the union of all taints *)
  let union (state1: t) (state2: t) : t =
    let register = Var.Map.merge state1.register state2.register ~f:(fun ~key:_ values->
      match values with
      | `Both (taint1, taint2) -> Some (Taint.union taint1 taint2)
      | `Left taint | `Right taint -> Some taint
    ) in
    let stack = Mem_region.merge state1.stack state2.stack ~data_merge:(fun taint1 taint2 ->
      Some( Ok(Taint.union taint1 taint2) )
    ) in
    { register = register;
      stack = stack; }

  (** remove virtual register from the state (useful at the end of a block) *)
  let remove_virtual_register (state: t) : t =
    { state with register = Var.Map.filter_keys state.register ~f:(fun var -> Var.is_physical var) }

end


(** The stack info contains all necessary information to access stack variables. *)
module StackInfo = struct
  type t = {
    type_info: Type_inference.TypeInfo.t;
    sub_tid:  Tid.t;
    project: Project.t;
    strict_mem_policy: Bool.t;
  }

  (** If the expression denotes an address on the stack, return the address. *)
  let get_address (stack_info: t) (expression: Exp.t) : Bitvector.t Option.t =
    Type_inference.TypeInfo.compute_stack_offset stack_info.type_info expression ~sub_tid:stack_info.sub_tid ~project:stack_info.project

  (** Assemble a StackInfo.t object. *)
  let assemble (pointer_info_map: Type_inference.TypeInfo.t Tid.Map.t) (term_tid: Tid.t) ~(sub_tid: Tid.t) ~(project: Project.t) ~(strict_mem_policy: Bool.t) : t =
    { type_info = Tid.Map.find_exn pointer_info_map term_tid;
      sub_tid = sub_tid;
      project = project;
      strict_mem_policy = strict_mem_policy; }

  (**/**)
  (* assemble a mock StackInfo for unit tests *)
  let assemble_mock_info (mock_tid: Tid.t) (project: Project.t) : t =
    { type_info = { Type_inference.TypeInfo.stack = Mem_region.empty (); Type_inference.TypeInfo.reg = Var.Map.empty};
      sub_tid = mock_tid;
      project = project;
      strict_mem_policy = false; }
  (**/**)
end


(** append taint to the list of already found cwe_hits *)
let append_to_hits (cwe_hits:Taint.t ref) (taint: Taint.t) : unit =
  cwe_hits := Taint.union !cwe_hits taint


(** Check whether an expression contains a tainted value.
    Memory accesses through tainted values are added to cwe_hits, but the Tids are not removed from the state. *)
let rec contains_taint (exp: Exp.t) (state: State.t) ~(cwe_hits: Taint.t ref) ~(stack: StackInfo.t) : Taint.t =
  match exp with
  | Bil.Load(_mem, addr, _endian, _size)->
    begin
      let access_taint = contains_taint addr state ~cwe_hits ~stack in
      let () = if Taint.is_empty access_taint = false then append_to_hits cwe_hits access_taint in
      match StackInfo.get_address stack addr with
      | Some(stack_offset) -> Option.value (State.find_stack state ~pos:stack_offset) ~default:Taint.empty
      | None -> Taint.empty
    end
  | Bil.Store(_mem, addr, val_expression, _,_) ->
    begin
      let access_taint = contains_taint addr state ~cwe_hits ~stack in
      let value_taint = contains_taint val_expression state ~cwe_hits ~stack in
      let () = if Taint.is_empty access_taint = false then append_to_hits cwe_hits access_taint in
      match StackInfo.get_address stack addr with
      | Some(_) -> Taint.empty
      | None ->
          let () = if stack.strict_mem_policy && (Taint.is_empty value_taint = false) then append_to_hits cwe_hits value_taint in
          Taint.empty
    end
  | Bil.BinOp(Bil.XOR, Bil.Var(var1), Bil.Var(var2)) when var1 = var2 -> Taint.empty (* standard assembly shortcut for setting a register to NULL *)
  | Bil.BinOp(_, exp1, exp2) -> Taint.union (contains_taint exp1 state ~cwe_hits ~stack) (contains_taint exp2 state ~cwe_hits ~stack)
  | Bil.UnOp(_, exp) -> contains_taint exp state ~cwe_hits ~stack
  | Bil.Var(var) -> Option.value (State.find_register state var) ~default:Taint.empty
  | Bil.Int(_) -> Taint.empty
  | Bil.Cast(_, _, exp) -> contains_taint exp state ~cwe_hits ~stack
  | Bil.Let(var, exp1, exp2) ->
      Taint.union_list (
        (contains_taint exp1 state ~cwe_hits ~stack)
        :: (contains_taint exp2 state ~cwe_hits ~stack)
        :: (contains_taint (Bil.var var) state ~cwe_hits ~stack) :: [])
  | Bil.Unknown(_) -> Taint.empty
  | Bil.Ite(if_, then_, else_) ->
      Taint.union_list (
        (contains_taint if_ state ~cwe_hits ~stack)
        :: (contains_taint then_ state ~cwe_hits ~stack)
        :: (contains_taint else_ state ~cwe_hits ~stack) :: [])
  | Bil.Extract(_,_, exp) -> contains_taint exp state ~cwe_hits ~stack
  | Bil.Concat(exp1, exp2) -> Taint.union (contains_taint exp1 state ~cwe_hits ~stack) (contains_taint exp2 state ~cwe_hits ~stack)


(** Parse an expression for memory accesses through tainted values and taint contained in the value itself.
    All memory accesses except for loading/storing values from/to the stack get flagged as cwe_hits.
    Returns the taint of the expression and the new state, with the Tids of new cwe_hits removed from both. *)
let parse_taint_of_exp (exp: Exp.t) (state: State.t) ~(cwe_hits: Taint.t ref) ~(stack: StackInfo.t) : Taint.t * State.t =
  let hits_to_clean : Taint.t ref = ref Taint.empty in
  let unchecked_taint = contains_taint exp state ~cwe_hits:hits_to_clean ~stack in
  let () = append_to_hits cwe_hits !hits_to_clean in
  let state = State.remove_taint state !hits_to_clean in
  let unchecked_taint = Taint.diff unchecked_taint !hits_to_clean in
  (unchecked_taint, state)


(** If an formerly unchecked return value was checked then remove all registers pointing
    to the source of this return value from state. *)
let checks_value (exp: Exp.t) (state: State.t) ~(cwe_hits: Taint.t ref) ~(stack: StackInfo.t) : State.t =
  match exp with
  | Bil.Ite(if_, _then_, _else_) -> begin
      let (taint_to_remove, state) = parse_taint_of_exp if_ state ~cwe_hits ~stack in
      if Taint.is_empty taint_to_remove = false then
        State.remove_taint state taint_to_remove
      else
        state
    end
  | _ -> state


(** flags any access (not just memory access) from an unchecked source as a cwe_hit. *)
let flag_any_access (exp: Exp.t) (state: State.t) ~(cwe_hits: Taint.t ref) ~(stack: StackInfo.t) : State.t=
  let (taint_to_flag, state) = parse_taint_of_exp exp state ~cwe_hits ~stack in
  let () = append_to_hits cwe_hits taint_to_flag in
  State.remove_taint state taint_to_flag


(** flag all unchecked registers and stack variables that may be used as return values.
    That means stack variables above the return pointer get flagged,
    but variables below the return pointer are treated as local variables and do not get flagged.
    Return empty state *)
let flag_unchecked_return_values (state: State.t) ~(cwe_hits: Taint.t ref) ~(project: Project.t) : State.t =
  let taint_to_flag = Var.Map.fold state.register ~init:Taint.empty ~f:(fun ~key ~data taint_accum ->
    if Cconv.is_return_register key project then
      Taint.union taint_accum data
    else
      taint_accum
  ) in
  let taint_to_flag = List.fold (Mem_region.list_data_pos state.stack) ~init:taint_to_flag ~f:(fun taint_accum (position_unsigned, taint_value) ->
    let position = Bitvector.to_int_exn (Bitvector.signed position_unsigned) in
    if position >= 0 then
      Taint.union taint_accum taint_value
    else
      taint_accum
  ) in
  let () = append_to_hits cwe_hits taint_to_flag in
  State.empty


(** flag all register taints as cwe_hits, but not taints that are only contained in stack variables *)
let flag_register_taints (state: State.t) ~(cwe_hits: Taint.t ref) : State.t =
  let taint_to_flag = List.fold (Var.Map.data state.register) ~init: Taint.empty ~f:(fun taint_accum register_taint ->
    Taint.union taint_accum register_taint
  ) in
  let () = append_to_hits cwe_hits taint_to_flag in
  State.remove_taint state taint_to_flag


(** Flag all possible parameter register as cwe_hits. These registers may be input values to an extern function call.
    This can lead to false positives if a function does not use all of these registers for argument passing. *)
let flag_parameter_register (state: State.t) ~(cwe_hits: Taint.t ref) ~(project: Project.t) : State.t =
  let taint_to_flag = Var.Map.fold state.register ~init:Taint.empty ~f:(fun ~key ~data taint_accum ->
    if Cconv.is_parameter_register key project then
      Taint.union taint_accum data
    else
      taint_accum
  ) in
  let () = append_to_hits cwe_hits taint_to_flag in
  State.remove_taint state taint_to_flag


(** Remove the taint of non-callee-saved register (without flagging them).
    For taints in parameter register we assume that they are checked by the callee, thus we also remove the corresponding Tids from the state. *)
let untaint_non_callee_saved_register (state: State.t) ~(project: Project.t) : State.t =
  let taint_to_remove = Var.Map.fold state.register ~init:Taint.empty ~f:(fun ~key ~data taint_accum ->
    if Cconv.is_callee_saved key project then
      taint_accum
    else
      Taint.union taint_accum data
  ) in
  let state = State.remove_taint state taint_to_remove in
  Var.Map.fold state.register ~init:state ~f:(fun ~key ~data:_ state ->
    if Cconv.is_callee_saved key project then
      state
    else
      State.remove_register state key
  )


(** If the expression is a store onto a stack variable, write the corresponding taint to the stack. *)
let update_stack_on_stores (exp: Exp.t) (state: State.t) ~(stack: StackInfo.t) : State.t =
  let pointer_size = Symbol_utils.arch_pointer_size_in_bytes stack.project in
  match exp with
  | Bil.Store(_mem, address_exp, value, _endian, size) -> begin
      let value_taint = contains_taint value state ~cwe_hits:(ref Taint.empty) ~stack in
      match StackInfo.get_address stack address_exp with
      | Some(address) ->
          if Taint.is_empty value_taint then
            State.remove_stack state ~pos:address ~size:(Bitvector.of_int (Size.in_bytes size) ~width:pointer_size)
          else
            State.set_stack state value_taint ~pos:address ~size:(Bitvector.of_int (Size.in_bytes size) ~width:pointer_size)
      | None -> state
    end
  | _ -> state


(** Updates the state depending on the def. If memory is accessed using an unchecked return value,
    then the access is added to the list of cwe_hits. *)
let update_state_def (def: Def.t) (state: State.t) ~(cwe_hits: Taint.t ref) ~(stack: StackInfo.t) : State.t =
  let (lhs, rhs) = (Def.lhs def, Def.rhs def) in
  let state = checks_value rhs state ~cwe_hits ~stack in
  let (rhs_taint, state) = parse_taint_of_exp rhs state ~cwe_hits ~stack in
  let state =
    if Taint.is_empty rhs_taint then
      State.remove_register state lhs
    else
      State.set_register state lhs rhs_taint in
  update_stack_on_stores rhs state ~stack


(** Taint the return registers of a function as unchecked return values. *)
let taint_return_registers (func_tid: Tid.t) (state: State.t) ~(project: Project.t) ~(block: Blk.t) : State.t =
  let func = Term.find_exn sub_t (Project.program project) func_tid in
  let arguments = Term.enum arg_t func in
  (* Every return register is tainted as unchecked return value. *)
  Seq.fold arguments ~init:state ~f:(fun state arg ->
      match Bap.Std.Arg.intent arg with
      | None | Some(In) -> state
      | Some(Out) | Some(Both) ->
        let variable = match Bap.Std.Arg.rhs arg with
          | Bil.Var(var) -> var
          | _ -> failwith "[CWE476] Return register wasn't a register." in
        State.set_register state variable (Taint.add Taint.empty (Term.tid block))
    )

(** Updates the state depending on the jump. On a jump to a function from the function list
    taint all return registers as unchecked return values. *)
let update_state_jmp
      (jmp: Jmp.t)
      (state: State.t)
      ~(cwe_hits: Taint.t ref)
      ~(malloc_like_functions: String.t List.t)
      ~(extern_functions: String.Set.t)
      ~(stack: StackInfo.t)
      ~(block: Blk.t)
      ~(strict_call_policy: Bool.t) : State.t =
  (* first check the guard condition for unchecked access. Any normal access clears the access from being unchecked *)
  let condition_exp = Jmp.cond jmp in
  let state = begin
    let (condition_taint, state) = parse_taint_of_exp condition_exp state ~cwe_hits ~stack in
    if Taint.is_empty condition_taint then
      state
    else
      State.remove_taint state condition_taint
  end in
  match Jmp.kind jmp with
  | Goto(Indirect(exp)) -> flag_any_access exp state ~cwe_hits ~stack
  | Goto(Direct(_)) -> state
  | Ret(_) -> if strict_call_policy then
      flag_unchecked_return_values state ~cwe_hits ~project:stack.project
    else
      state
  | Int(_, _) -> flag_register_taints state ~cwe_hits
  | Call(call) ->
      (* flag tainted values in the call and return expressions of indirect calls *)
      let state = match Call.return call with
        | Some(Indirect(exp)) -> flag_any_access exp state ~cwe_hits ~stack
        | _ -> state in
      let state =  begin match Call.target call with
        | Indirect(exp) -> flag_any_access exp state ~cwe_hits ~stack
        | _ -> state end in
      (* flag tainted values in the parameter registers (if strict_call_policy is set to true)*)
      let state = match (Call.target call, strict_call_policy) with
        | (Indirect(_), false)
        | (Direct(_), false) -> state
        | (Indirect(_), true) -> flag_parameter_register state ~cwe_hits ~project:stack.project (* TODO: indirect calls are handled as extern calls right now. Change that *)
        | (Direct(tid), true) ->
            let sub = Term.find_exn sub_t (Project.program stack.project) tid in
            if Set.mem extern_functions (Sub.name sub) then
              flag_parameter_register state ~cwe_hits ~project:stack.project
            else (* flag all registers for intern calls, as these do not necessarily adhere to any calling convention *)
              flag_register_taints state ~cwe_hits
      in
      (* remove the taint of non-callee-saved registers *)
      let state = match Call.target call with
        | Direct(tid) ->
            let sub = Term.find_exn sub_t (Project.program stack.project) tid in
            if Set.mem extern_functions (Sub.name sub) then
              untaint_non_callee_saved_register state ~project:stack.project
            else (* we untaint all registers for internal function calls, as these do not necessarily adhere to any calling convention *)
              { state with register = Var.Map.empty }
        | Indirect(_) -> (* we treat all indirect calls as extern function calls, since we cannot handle indirect calls properly yet *)
            untaint_non_callee_saved_register state ~project:stack.project
      in
      (* introduce new taint for the return values of malloc_like_functions *)
      match Call.target call with
      | Indirect(_) -> state
      | Direct(tid) ->
          if List.exists malloc_like_functions ~f:(fun elem -> String.(=) elem (Tid.name tid)) then
            taint_return_registers tid state ~project:stack.project ~block
          else
            state


(** updates a block analysis.
    The strict call policy decides the behaviour on call and return instructions:
    strict: unchecked values in registers get flagged as cwe_hits
    non-strict: unchecked values in registers get marked as checked. It is assumed that the callee checks these values. *)
let update_block_analysis
      (block: Blk.t)
      (state: State.t)
      ~(cwe_hits: Taint.t ref)
      ~(malloc_like_functions: String.t List.t)
      ~(extern_functions: String.Set.t)
      ~(sub_tid: Tid.t)
      ~(project: Project.t)
      ~(strict_call_policy: Bool.t)
      ~(strict_mem_policy: Bool.t)  : State.t =
  let elements = Blk.elts block in
  let type_info_map = Type_inference.get_type_info_of_block ~project block ~sub_tid in
  let state = Seq.fold elements ~init:state ~f:(fun state element ->
      match element with
      | `Def def ->
          let stack = StackInfo.assemble type_info_map (Term.tid def) ~sub_tid ~project ~strict_mem_policy in
          update_state_def def state ~cwe_hits ~stack
      | `Phi _phi -> state (* We ignore phi terms for this analysis. *)
      | `Jmp jmp ->
          let stack = StackInfo.assemble type_info_map (Term.tid jmp) ~sub_tid ~project ~strict_mem_policy in
          update_state_jmp jmp state ~cwe_hits ~malloc_like_functions ~extern_functions ~stack ~block ~strict_call_policy
    ) in
  State.remove_virtual_register state (* virtual registers should not be accessed outside of the block where they are defined. *)


(** print a cwe_hit to the log *)
let print_hit (tid: Tid.t) ~(sub: Sub.t) ~(malloc_like_functions: String.t List.t) ~(tid_map: Word.t Tid.Map.t) : unit =
  let block = Option.value_exn (Term.find blk_t sub tid) in
  let jmps = Term.enum jmp_t block in
  let _ = Seq.find_exn jmps ~f:(fun jmp ->
    match Jmp.kind jmp with
    | Call(call) -> begin
        match Call.target call with
        | Direct(call_tid) -> Option.is_some (List.find malloc_like_functions ~f:(fun fn_name ->
          if fn_name = (Tid.name call_tid) then
            begin
              let address = Address_translation.translate_tid_to_assembler_address_string (Term.tid jmp) tid_map in
              let tids = [Address_translation.tid_to_string (Term.tid jmp)] in
              let description = sprintf
                                  "(NULL Pointer Dereference) There is no check if the return value is NULL at %s (%s)."
                                  address
                                  fn_name in
              let cwe_warning = cwe_warning_factory
                                  name
                                  version
                                  ~addresses:[address]
                                  ~tids:tids
                                  ~symbols:[fn_name]
                                  description in
              collect_cwe_warning cwe_warning;
              true
            end else
            false
        ))
        | _ -> false
      end
    | _ -> false
  ) in ()


let check_cwe (_prog: Program.t) (project: Project.t) (tid_map: Word.t Tid.Map.t) (symbol_names: String.t List.t List.t) (parameters: String.t List.t) =
  let symbols = match symbol_names with
    | hd :: _ -> hd
    | _ -> failwith "[CWE476] symbol_names not as expected" in
  let (strict_call_policy_string, strict_mem_policy_string, max_steps_string) = match parameters with
    | par1 :: par2 :: par3 :: _ -> (par1, par2, par3)
    | _ -> failwith "[CWE476] parameters not as expected" in
  let strict_call_policy = match String.split strict_call_policy_string ~on:'=' with
    | "strict_call_policy" :: policy :: [] -> bool_of_string policy
    | _ -> failwith "[CWE476] parameters not as expected" in
  let strict_mem_policy = match String.split strict_mem_policy_string ~on:'=' with
    | "strict_memory_policy" :: policy :: [] -> bool_of_string policy
    | _ -> failwith "[CWE476] parameters not as expected" in
  let max_steps = match String.split max_steps_string ~on:'=' with
    | "max_steps" :: num :: [] -> int_of_string num
    | _ -> failwith "[CWE476] parameters not as expected" in
  let malloc_like_functions = List.map symbols ~f:(fun symb -> "@" ^ symb)  in
  let extern_functions = Symbol_utils.parse_dyn_syms project in
  (* run the pointer inference analysis. TODO: This should be done somewhere else as this analysis will be needed in more than one check! *)
  let project = Type_inference.compute_pointer_register project in
  let subfunctions = Term.enum sub_t (Project.program project) in
  Seq.iter subfunctions ~f:(fun subfn ->
      let cfg = Sub.to_cfg subfn in
      let cwe_hits = ref Taint.empty in
      let empty = Map.empty (module Graphs.Ir.Node) in
      let init = Graphlib.Std.Solution.create empty State.empty in
      let equal = State.equal in
      let merge = State.union in
      let f = (fun node state ->
          let block = Graphs.Ir.Node.label node in
          update_block_analysis block state ~cwe_hits ~malloc_like_functions ~extern_functions ~sub_tid:(Term.tid subfn) ~project ~strict_call_policy ~strict_mem_policy
        ) in
      let _ = Graphlib.Std.Graphlib.fixpoint (module Graphs.Ir) cfg ~steps:max_steps ~rev:false ~init:init ~equal:equal ~merge:merge ~f:f in
      Tid.Set.iter (!cwe_hits) ~f:(fun hit -> print_hit hit ~sub:subfn ~malloc_like_functions ~tid_map)
  )

(**/**)
(* Functions made public for unit tests *)
module Private = struct
  module StackInfo = StackInfo
  module Taint = Taint
  module State = State
  let flag_unchecked_return_values = flag_unchecked_return_values
  let flag_register_taints = flag_register_taints
  let flag_parameter_register = flag_parameter_register
  let untaint_non_callee_saved_register = untaint_non_callee_saved_register
end