Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
C
cwe_checker
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
fact-depend
cwe_checker
Commits
5343432e
Commit
5343432e
authored
Sep 30, 2020
by
Enkelmann
Committed by
Enkelmann
Nov 03, 2020
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add documentation to the internal IR
parent
676e27a0
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
194 additions
and
47 deletions
+194
-47
mod.rs
cwe_checker_rs/src/analysis/pointer_inference/mod.rs
+1
-0
expression.rs
cwe_checker_rs/src/intermediate_representation/expression.rs
+30
-0
mod.rs
cwe_checker_rs/src/intermediate_representation/mod.rs
+14
-1
term.rs
cwe_checker_rs/src/intermediate_representation/term.rs
+137
-1
variable.rs
cwe_checker_rs/src/intermediate_representation/variable.rs
+8
-0
lib.rs
cwe_checker_rs/src/lib.rs
+1
-1
term.rs
cwe_checker_rs/src/pcode/term.rs
+0
-1
mod.rs
cwe_checker_rs/src/term/mod.rs
+3
-43
No files found.
cwe_checker_rs/src/analysis/pointer_inference/mod.rs
View file @
5343432e
...
...
@@ -20,6 +20,7 @@ use petgraph::graph::NodeIndex;
use
petgraph
::
visit
::
IntoNodeReferences
;
use
petgraph
::
Direction
;
use
std
::
collections
::
HashMap
;
use
crate
::
prelude
::
*
;
mod
context
;
mod
object
;
...
...
cwe_checker_rs/src/intermediate_representation/expression.rs
View file @
5343432e
...
...
@@ -2,28 +2,55 @@ use super::ByteSize;
use
super
::
Variable
;
use
crate
::
prelude
::
*
;
/// An expression is a calculation rule
/// on how to compute a certain value given some variables (register values) as input.
///
/// The basic building blocks of expressions are the same as for Ghidra P-Code.
/// However, expressions can be nested, unlike original P-Code.
///
/// Computing the value of an expression is a side-effect-free operation.
///
/// Expressions are typed in the sense that each expression has a `ByteSize`
/// indicating the size of the result when evaluating the expression.
/// Some expressions impose restrictions on the sizes of their inputs
/// for the expression to be well-typed.
///
/// All operations are defined the same as the corresponding P-Code operation.
/// Further information about specific operations can be obtained by looking up the P-Code mnemonics in the
/// [P-Code Reference Manual](https://ghidra.re/courses/languages/html/pcoderef.html).
#[derive(Serialize,
Deserialize,
Debug,
PartialEq,
Eq,
Hash,
Clone)]
pub
enum
Expression
{
/// A variable representing a register or temporary value of known size.
Var
(
Variable
),
/// A constant value represented by a bitvector.
Const
(
Bitvector
),
/// A binary operation.
/// Note that most (but not all) operations require the left hand side (`lhs`)
/// and right hand side (`rhs`) to be of equal size.
BinOp
{
op
:
BinOpType
,
lhs
:
Box
<
Expression
>
,
rhs
:
Box
<
Expression
>
,
},
/// A unary operation
UnOp
{
op
:
UnOpType
,
arg
:
Box
<
Expression
>
,
},
/// A cast operation for type cast between integer and floating point types of different byte lengths.
Cast
{
op
:
CastOpType
,
size
:
ByteSize
,
arg
:
Box
<
Expression
>
,
},
/// An unknown value but with known size.
/// This may be generated for e.g. unsupported assembly instructions.
/// Note that computation of an unknown value is still required to be side-effect-free!
Unknown
{
description
:
String
,
size
:
ByteSize
,
},
/// Extracting a sub-bitvector from the argument expression.
Subpiece
{
low_byte
:
ByteSize
,
size
:
ByteSize
,
...
...
@@ -31,6 +58,7 @@ pub enum Expression {
},
}
/// The type/mnemonic of a binary operation
#[derive(Serialize,
Deserialize,
Debug,
PartialEq,
Eq,
Hash,
Clone,
Copy)]
pub
enum
BinOpType
{
Piece
,
...
...
@@ -69,6 +97,7 @@ pub enum BinOpType {
FloatDiv
,
}
/// The type/mnemonic of a typecast
#[derive(Serialize,
Deserialize,
Debug,
PartialEq,
Eq,
Hash,
Clone,
Copy)]
pub
enum
CastOpType
{
IntZExt
,
...
...
@@ -78,6 +107,7 @@ pub enum CastOpType {
Trunc
,
}
/// The type/mnemonic of an unary operation
#[derive(Serialize,
Deserialize,
Debug,
PartialEq,
Eq,
Hash,
Clone,
Copy)]
pub
enum
UnOpType
{
IntNegate
,
...
...
cwe_checker_rs/src/intermediate_representation/mod.rs
View file @
5343432e
//! This module defines the intermediate representation used to represent a binary
//! and all its contained executable code.
//!
//! The main data structure is the `Project` struct,
//! which contains all information recovered about a binary during the disassembly step.
//! To learn how individual instructions are encoded,
//! you should first take a look at the `Expression` type and then at the `Def` and `Jmp` data types,
//! which form the basis of the basic block `Blk` struct.
use
crate
::
prelude
::
*
;
use
derive_more
::
*
;
use
std
::
convert
::
TryFrom
;
...
...
@@ -9,7 +18,11 @@ pub use expression::*;
mod
term
;
pub
use
term
::
*
;
// TODO: move ByteSize and BitSize into their own module
/// An unsigned number of bytes.
///
/// Used to represent sizes of values in registers or in memory.
/// Can also be used for other byte-valued numbers, like offsets,
/// as long as the number is guaranteed to be non-negative.
#[derive(
Serialize,
Deserialize,
...
...
cwe_checker_rs/src/intermediate_representation/term.rs
View file @
5343432e
use
super
::{
ByteSize
,
Expression
,
Variable
};
use
crate
::
prelude
::
*
;
use
crate
::
term
::{
Term
,
Tid
};
/// A term identifier consisting of an ID string (which is required to be unique)
/// and an address to indicate where the term is located.
#[derive(Serialize,
Deserialize,
Debug,
PartialEq,
Eq,
Hash,
Clone,
PartialOrd,
Ord)]
pub
struct
Tid
{
/// The unique ID of the term.
id
:
String
,
/// The address where the term is located.
pub
address
:
String
,
}
impl
Tid
{
/// Generate a new term identifier with the given ID string
/// and with unknown address.
pub
fn
new
<
T
:
ToString
>
(
val
:
T
)
->
Tid
{
Tid
{
id
:
val
.to_string
(),
address
:
"UNKNOWN"
.to_string
(),
}
}
/// Add a suffix to the ID string and return the new `Tid`
pub
fn
with_id_suffix
(
self
,
suffix
:
&
str
)
->
Self
{
Tid
{
id
:
self
.id
+
suffix
,
address
:
self
.address
,
}
}
}
impl
std
::
fmt
::
Display
for
Tid
{
fn
fmt
(
&
self
,
formatter
:
&
mut
std
::
fmt
::
Formatter
)
->
std
::
fmt
::
Result
{
write!
(
formatter
,
"{}"
,
self
.id
)
}
}
/// A term is an object inside a binary with an address and an unique ID (both contained in the `tid`).
#[derive(Serialize,
Deserialize,
Debug,
PartialEq,
Eq,
Hash,
Clone)]
pub
struct
Term
<
T
>
{
/// The term identifier, which also contains the address of the term
pub
tid
:
Tid
,
/// The object
pub
term
:
T
,
}
/// A side-effectful operation.
/// Can be a register assignment or a memory load/store operation.
#[derive(Serialize,
Deserialize,
Debug,
PartialEq,
Eq,
Hash,
Clone)]
pub
enum
Def
{
/// A memory load into the register given by `var`.
///
/// The size of `var` also determines the number of bytes read from memory.
/// The size of `address` is required to match the pointer size of the corresponding CPU architecture.
Load
{
var
:
Variable
,
address
:
Expression
,
},
/// A memory store operation.
///
/// The size of `value` determines the number of bytes written.
/// The size of `address` is required to match the pointer size of the corresponding CPU architecture.
Store
{
address
:
Expression
,
value
:
Expression
,
},
/// A register assignment, assigning the result of the expression `value` to the register `var`.
Assign
{
var
:
Variable
,
value
:
Expression
,
},
}
/// A `Jmp` instruction affects the control flow of a program, i.e. it may change the instruction pointer.
/// With the exception of `CallOther`, it has no other side effects.
///
/// `Jmp` instructions carry some semantic information with it, like whether a jump is intra- or interprocedural.
/// Note that this semantic information may not always be correct.
///
/// The targets (and return targets) of jumps are, if known, either basic blocks (`Blk`) or subroutines (`Sub`)
/// depending of the type of the jump.
#[derive(Serialize,
Deserialize,
Debug,
PartialEq,
Eq,
Hash,
Clone)]
pub
enum
Jmp
{
/// A direct intraprocedural jump to the targeted `Blk` term identifier.
Branch
(
Tid
),
/// An indirect intraprocedural jump to the address that the given expression evaluates to.
BranchInd
(
Expression
),
/// A direct intraprocedural jump that is only taken if the condition evaluates to true (i.e. not zero).
CBranch
{
target
:
Tid
,
condition
:
Expression
,
},
/// A direct interprocedural jump representing a subroutine call.
///
/// Note that this is syntactically equivalent to a `Jmp::Branch`.
/// If the `return_` is `None`, then the called function does not return to its caller.
Call
{
target
:
Tid
,
return_
:
Option
<
Tid
>
,
},
/// An indirect interprocedural jump to the address the `target` expression evaluates to
/// and representing a subroutine call.
///
/// Note that this is syntactically equivalent to a `Jmp::BranchInd`.
/// If the `return_` is `None`, then the called function is believed to not return to its caller.
CallInd
{
target
:
Expression
,
return_
:
Option
<
Tid
>
,
},
/// A indirect interprocedural jump indicating a return from a subroutine.
///
/// Note that this is syntactically equivalent to a `Jmp::BranchInd`.
Return
(
Expression
),
/// This instruction is used for all side effects that are not representable by other instructions
/// or not supported by the disassembler.
///
/// E.g. syscalls and other interrupts are mapped to `CallOther`.
/// Assembly instructions that the disassembler does not support are also mapped to `CallOther`.
/// One can use the `description` field to match for and handle known side effects (e.g. syscalls).
///
/// The `return_` field indicates the `Blk` term identifier
/// where the disassembler assumes that execution will continue after handling of the side effect.
CallOther
{
description
:
String
,
return_
:
Option
<
Tid
>
,
},
}
/// A basic block is a sequence of `Def` instructions followed by up to two `Jmp` instructions.
///
/// The `Def` instructions represent side-effectful operations that are executed in order when the block is entered.
/// `Def` instructions do not affect the control flow of a program.
///
/// The `Jmp` instructions represent control flow affecting operations.
/// There can only be zero, one or two `Jmp`s:
/// - Zero `Jmp`s indicate that the next execution to be executed could not be discerned.
/// This should only happen on disassembler errors or on dead ends in the control flow graph that were deliberately inserted by the user.
/// - If there is exactly one `Jmp`, it is required to be an unconditional jump.
/// - For two jumps, the first one has to be a conditional jump,
/// where the second unconditional jump is only taken if the condition of the first jump evaluates to false.
///
/// Basic blocks are *single entry, single exit*, i.e. a basic block is only entered at the beginning
/// and is only exited by the jump instructions at the end of the block.
/// If a new control flow edge is discovered that would jump to the middle of a basic block,
/// the block structure needs to be updated accordingly.
#[derive(Serialize,
Deserialize,
Debug,
PartialEq,
Eq,
Hash,
Clone)]
pub
struct
Blk
{
pub
defs
:
Vec
<
Term
<
Def
>>
,
pub
jmps
:
Vec
<
Term
<
Jmp
>>
,
}
/// A `Sub` or subroutine represents a function with a given name and a list of basic blocks belonging to it.
///
/// Subroutines are *single-entry*,
/// i.e. calling a subroutine will execute the first block in the list of basic blocks.
/// A subroutine may have multiple exits, which are identified by `Jmp::Return` instructions.
#[derive(Serialize,
Deserialize,
Debug,
PartialEq,
Eq,
Hash,
Clone)]
pub
struct
Sub
{
/// The name of the subroutine
pub
name
:
String
,
/// The basic blocks belonging to the subroutine.
/// The first block is also the entry point of the subroutine.
pub
blocks
:
Vec
<
Term
<
Blk
>>
,
}
/// A parameter or return argument of a function.
#[derive(Serialize,
Deserialize,
Debug,
PartialEq,
Eq,
Hash,
Clone)]
pub
enum
Arg
{
/// The argument is passed in a register
Register
(
Variable
),
/// The argument is passed on the stack.
/// It is positioned at the given offset (in bytes) relative to the stack pointer on function entry
/// and has the given size.
Stack
{
offset
:
i64
,
size
:
ByteSize
},
}
/// An extern symbol represents a funtion that is dynamically linked from another binary.
#[derive(Serialize,
Deserialize,
Debug,
PartialEq,
Eq,
Hash,
Clone)]
pub
struct
ExternSymbol
{
pub
tid
:
Tid
,
/// The name of the extern symbol
pub
name
:
String
,
/// The calling convention used for the extern symbol if known
pub
calling_convention
:
Option
<
String
>
,
/// Parameters of an extern symbol.
/// May be empty if there are no parameters or the parameters are unknown.
pub
parameters
:
Vec
<
Arg
>
,
/// Return values of an extern symbol.
/// May be empty if there is no return value or the return values are unknown.
pub
return_values
:
Vec
<
Arg
>
,
/// If set to `true`, the function is assumed to never return to its caller when called.
pub
no_return
:
bool
,
}
/// The `Program` structure represents a disassembled binary.
#[derive(Serialize,
Deserialize,
Debug,
PartialEq,
Eq,
Hash,
Clone)]
pub
struct
Program
{
/// The known functions contained in the binary
pub
subs
:
Vec
<
Term
<
Sub
>>
,
/// Extern symbols linked to the binary by the linker.
pub
extern_symbols
:
Vec
<
ExternSymbol
>
,
/// Entry points into to binary,
/// i.e. the term identifiers of functions that may be called from outside of the binary.
pub
entry_points
:
Vec
<
Tid
>
,
}
/// The `Project` struct is the main data structure representing a binary.
///
/// It contains information about the disassembled binary
/// and about the execution environment of the binary.
#[derive(Serialize,
Deserialize,
Debug,
PartialEq,
Eq,
Hash,
Clone)]
pub
struct
Project
{
/// All (known) executable code of the binary is contained in the `program` term.
pub
program
:
Term
<
Program
>
,
/// The CPU architecture on which the binary is assumed to be executed.
pub
cpu_architecture
:
String
,
/// The stack pointer register for the given CPU architecture.
pub
stack_pointer_register
:
Variable
,
}
cwe_checker_rs/src/intermediate_representation/variable.rs
View file @
5343432e
use
super
::
ByteSize
;
use
crate
::
prelude
::
*
;
/// A variable represents a register with a known size and name.
///
/// Variables can be temporary (or virtual).
/// In this case they do not represent actual physical registers
/// and are only used to store intermediate results necessary for representing more complex assembly instructions.
/// Temporary variables are only valid until the end of the current assembly instruction.
/// However, one assembly instruction may span more than one basic block in the intermediate representation
/// (but never more than one function).
#[derive(Serialize,
Deserialize,
Debug,
PartialEq,
Eq,
Hash,
Clone)]
pub
struct
Variable
{
pub
name
:
String
,
...
...
cwe_checker_rs/src/lib.rs
View file @
5343432e
...
...
@@ -21,6 +21,6 @@ mod prelude {
pub
use
serde
::{
Deserialize
,
Serialize
};
pub
use
crate
::
bil
::{
BitSize
,
Bitvector
};
pub
use
crate
::
term
::
Tid
;
pub
use
crate
::
intermediate_representation
::{
Term
,
Tid
}
;
pub
use
anyhow
::{
anyhow
,
Error
};
}
cwe_checker_rs/src/pcode/term.rs
View file @
5343432e
...
...
@@ -10,7 +10,6 @@ use crate::intermediate_representation::Program as IrProgram;
use
crate
::
intermediate_representation
::
Project
as
IrProject
;
use
crate
::
intermediate_representation
::
Sub
as
IrSub
;
use
crate
::
prelude
::
*
;
use
crate
::
term
::{
Term
,
Tid
};
// TODO: Handle the case where an indirect tail call is represented by CALLIND plus RETURN
...
...
cwe_checker_rs/src/term/mod.rs
View file @
5343432e
...
...
@@ -8,45 +8,11 @@ use crate::intermediate_representation::Program as IrProgram;
use
crate
::
intermediate_representation
::
Project
as
IrProject
;
use
crate
::
intermediate_representation
::
Sub
as
IrSub
;
use
serde
::{
Deserialize
,
Serialize
};
use
crate
::
intermediate_representation
::{
Term
,
Tid
};
pub
mod
symbol
;
use
symbol
::
ExternSymbol
;
#[derive(Serialize,
Deserialize,
Debug,
PartialEq,
Eq,
Hash,
Clone,
PartialOrd,
Ord)]
pub
struct
Tid
{
id
:
String
,
pub
address
:
String
,
}
impl
Tid
{
pub
fn
new
<
T
:
ToString
>
(
val
:
T
)
->
Tid
{
Tid
{
id
:
val
.to_string
(),
address
:
"UNKNOWN"
.to_string
(),
}
}
/// Add a suffix to the ID string and return the new `Tid`
pub
fn
with_id_suffix
(
self
,
suffix
:
&
str
)
->
Self
{
Tid
{
id
:
self
.id
+
suffix
,
address
:
self
.address
,
}
}
}
impl
std
::
fmt
::
Display
for
Tid
{
fn
fmt
(
&
self
,
formatter
:
&
mut
std
::
fmt
::
Formatter
)
->
std
::
fmt
::
Result
{
write!
(
formatter
,
"{}"
,
self
.id
)
}
}
#[derive(Serialize,
Deserialize,
Debug,
PartialEq,
Eq,
Hash,
Clone)]
pub
struct
Term
<
T
>
{
pub
tid
:
Tid
,
pub
term
:
T
,
}
#[derive(Serialize,
Deserialize,
Debug,
PartialEq,
Eq,
Hash,
Clone)]
pub
struct
Def
{
pub
lhs
:
Variable
,
...
...
@@ -241,10 +207,7 @@ impl From<Blk> for IrBlk {
}
else
{
for
(
counter
,
ir_def
)
in
ir_defs
.into_iter
()
.enumerate
()
{
ir_def_terms
.push
(
Term
{
tid
:
Tid
{
id
:
format!
(
"{}_{}"
,
def_term
.tid.id
,
counter
),
address
:
def_term
.tid.address
.clone
(),
},
tid
:
def_term
.tid
.clone
()
.with_id_suffix
(
&
format!
(
"_{}"
,
counter
)),
term
:
ir_def
,
});
}
...
...
@@ -261,10 +224,7 @@ impl From<Blk> for IrBlk {
}
for
(
counter
,
ir_def
)
in
ir_defs
.into_iter
()
.enumerate
()
{
ir_def_terms
.push
(
Term
{
tid
:
Tid
{
id
:
format!
(
"{}_{}"
,
jmp_term
.tid.id
,
counter
),
address
:
jmp_term
.tid.address
.clone
(),
},
tid
:
jmp_term
.tid
.clone
()
.with_id_suffix
(
&
format!
(
"_{}"
,
counter
)),
term
:
ir_def
,
});
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment