1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
use super::object_list::AbstractObjectList;
use super::Data;
use crate::abstract_domain::*;
use crate::intermediate_representation::*;
use crate::prelude::*;
use std::collections::{BTreeMap, BTreeSet};
mod access_handling;
/// Contains all information known about the state of a program at a specific point of time.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct State {
/// Maps a register variable to the data known about its content.
/// A variable not contained in the map has value `Data::Top(..)`, i.e. nothing is known about its content.
register: BTreeMap<Variable, Data>,
/// The list of all known memory objects.
pub memory: AbstractObjectList,
/// The abstract identifier of the current stack frame.
/// It points to the to the base of the stack frame, i.e. only negative offsets point into the current stack frame.
pub stack_id: AbstractIdentifier,
/// All known IDs of caller stack frames.
/// Note that these IDs are named after the callsite,
/// i.e. we can distinguish every callsite and for recursive functions the caller and current stack frames have different IDs.
///
/// Writes to the current stack frame with offset >= 0 are written to *all* caller stack frames.
/// Reads to the current stack frame with offset >= 0 are handled as merge-read from all caller stack frames.
pub caller_stack_ids: BTreeSet<AbstractIdentifier>,
/// All IDs of objects that are known to some caller.
/// This is an overapproximation of all object IDs that may have been passed as parameters to the function.
/// The corresponding objects are not allowed to be deleted (even if no pointer to them exists anymore)
/// so that after returning from a call the caller can recover their modified contents
/// and the callee does not accidentally delete this information if it loses all pointers to an object.
///
/// Note that IDs that the callee should not have access to are not included here.
/// For these IDs the caller can assume that the contents of the corresponding memory object were not accessed or modified by the call.
pub ids_known_to_caller: BTreeSet<AbstractIdentifier>,
}
impl State {
/// Create a new state that contains only one memory object corresponding to the stack.
/// The stack offset will be set to zero.
pub fn new(stack_register: &Variable, function_tid: Tid) -> State {
let stack_id = AbstractIdentifier::new(
function_tid,
AbstractLocation::from_var(stack_register).unwrap(),
);
let mut register: BTreeMap<Variable, Data> = BTreeMap::new();
register.insert(
stack_register.clone(),
PointerDomain::new(
stack_id.clone(),
Bitvector::zero(apint::BitWidth::from(stack_register.size)).into(),
)
.into(),
);
State {
register,
memory: AbstractObjectList::from_stack_id(stack_id.clone(), stack_register.size),
stack_id,
caller_stack_ids: BTreeSet::new(),
ids_known_to_caller: BTreeSet::new(),
}
}
/// Clear all non-callee-saved registers from the state.
/// This automatically also removes all virtual registers.
/// The parameter is a list of callee-saved register names.
pub fn clear_non_callee_saved_register(&mut self, callee_saved_register_names: &[String]) {
let register = self
.register
.iter()
.filter_map(|(register, value)| {
if callee_saved_register_names
.iter()
.any(|reg_name| **reg_name == register.name)
{
Some((register.clone(), value.clone()))
} else {
None
}
})
.collect();
self.register = register;
}
/// Mark those parameter values of an extern function call, that are passed on the stack,
/// as unknown data (since the function may modify them).
pub fn clear_stack_parameter(
&mut self,
extern_call: &ExternSymbol,
stack_pointer_register: &Variable,
) -> Result<(), Error> {
let mut result_log = Ok(());
for arg in &extern_call.parameters {
match arg {
Arg::Register(_) => (),
Arg::Stack { offset, size } => {
let data_top = Data::new_top(*size);
let location_expression = Expression::BinOp {
lhs: Box::new(Expression::Var(stack_pointer_register.clone())),
op: BinOpType::IntAdd,
rhs: Box::new(Expression::Const(
Bitvector::from_i64(*offset)
.into_truncate(apint::BitWidth::from(stack_pointer_register.size))
.unwrap(),
)),
};
if let Err(err) = self.write_to_address(&location_expression, &data_top) {
result_log = Err(err);
}
}
}
}
// We only return the last error encountered.
result_log
}
/// Replace all occurences of old_id with new_id and adjust offsets accordingly.
/// This is needed to replace stack/caller IDs on call and return instructions.
///
/// **Example:**
/// Assume the old_id points to offset 0 in the corresponding memory object and the new_id points to offset -32.
/// Then the offset_adjustment is -32.
/// The offset_adjustment gets *added* to the base offset in self.memory.ids (so that it points to offset -32 in the memory object),
/// while it gets *subtracted* from all pointer values (so that they still point to the same spot in the corresponding memory object).
pub fn replace_abstract_id(
&mut self,
old_id: &AbstractIdentifier,
new_id: &AbstractIdentifier,
offset_adjustment: &BitvectorDomain,
) {
for register_data in self.register.values_mut() {
register_data.replace_abstract_id(old_id, new_id, &(-offset_adjustment.clone()));
}
self.memory
.replace_abstract_id(old_id, new_id, offset_adjustment);
if &self.stack_id == old_id {
self.stack_id = new_id.clone();
}
if self.caller_stack_ids.get(old_id).is_some() {
self.caller_stack_ids.remove(old_id);
self.caller_stack_ids.insert(new_id.clone());
}
if self.ids_known_to_caller.get(old_id).is_some() {
self.ids_known_to_caller.remove(old_id);
self.ids_known_to_caller.insert(new_id.clone());
}
}
/// Remove all objects that cannot longer be reached by any known pointer.
/// This does not remove objects, where some caller may still know a pointer to the object.
///
/// The function uses an underapproximation of all possible pointer targets contained in a memory object.
/// This keeps the number of tracked objects reasonably small.
pub fn remove_unreferenced_objects(&mut self) {
// get all referenced IDs
let mut referenced_ids = BTreeSet::new();
for (_reg_name, data) in self.register.iter() {
referenced_ids.append(&mut data.referenced_ids());
}
referenced_ids.insert(self.stack_id.clone());
referenced_ids.append(&mut self.caller_stack_ids.clone());
referenced_ids.append(&mut self.ids_known_to_caller.clone());
referenced_ids = self.add_directly_reachable_ids_to_id_set(referenced_ids);
// remove unreferenced objects
self.memory.remove_unused_objects(&referenced_ids);
}
/// Search (recursively) through all memory objects referenced by the given IDs
/// and add all IDs reachable through concrete pointers contained in them to the set of IDs.
///
/// This uses an underapproximation of the referenced IDs of a memory object,
/// i.e. IDs may be missing if the analysis lost track of the corresponding pointer.
pub fn add_directly_reachable_ids_to_id_set(
&self,
mut ids: BTreeSet<AbstractIdentifier>,
) -> BTreeSet<AbstractIdentifier> {
let mut unsearched_ids = ids.clone();
while let Some(id) = unsearched_ids.iter().next() {
let id = id.clone();
unsearched_ids.remove(&id);
let memory_ids = self.memory.get_referenced_ids_underapproximation(&id);
for mem_id in memory_ids {
if ids.get(&mem_id).is_none() {
ids.insert(mem_id.clone());
unsearched_ids.insert(mem_id.clone());
}
}
}
ids
}
/// Search (recursively) through all memory objects referenced by the given IDs
/// and add all IDs contained in them to the set of IDs.
///
/// This uses an overapproximation of the referenced IDs of a memory object,
/// i.e. for a memory object it may add IDs as possible references
/// where the corresponding reference is not longer present in the memory object.
pub fn add_recursively_referenced_ids_to_id_set(
&self,
mut ids: BTreeSet<AbstractIdentifier>,
) -> BTreeSet<AbstractIdentifier> {
let mut unsearched_ids = ids.clone();
while let Some(id) = unsearched_ids.iter().next() {
let id = id.clone();
unsearched_ids.remove(&id);
let memory_ids = self.memory.get_referenced_ids_overapproximation(&id);
for mem_id in memory_ids {
if ids.get(&mem_id).is_none() {
ids.insert(mem_id.clone());
unsearched_ids.insert(mem_id.clone());
}
}
}
ids
}
/// Merge the callee stack with the caller stack.
///
/// This deletes the memory object corresponding to the callee_id
/// and updates all other references pointing to the callee_id to point to the caller_id.
/// The offset adjustment is handled as in `replace_abstract_id`.
///
/// Note that right now the content of the callee memory object is *not* merged into the caller memory object.
/// In general this is the correct behaviour
/// as the content below the stack pointer should be considered uninitialized memory after returning to the caller.
/// However, an aggressively optimizing compiler or an unknown calling convention may deviate from this.
pub fn merge_callee_stack_to_caller_stack(
&mut self,
callee_id: &AbstractIdentifier,
caller_id: &AbstractIdentifier,
offset_adjustment: &BitvectorDomain,
) {
self.memory.remove_object(callee_id);
self.replace_abstract_id(callee_id, caller_id, offset_adjustment);
}
/// Mark a memory object as already freed (i.e. pointers to it are dangling).
/// If the object cannot be identified uniquely, all possible targets are marked as having an unknown status.
///
/// If this may cause double frees (i.e. the object in question may have been freed already),
/// an error with the list of possibly already freed objects is returned.
pub fn mark_mem_object_as_freed(
&mut self,
object_pointer: &PointerDomain<BitvectorDomain>,
) -> Result<(), Vec<(AbstractIdentifier, Error)>> {
self.memory.mark_mem_object_as_freed(object_pointer)
}
/// Remove all virtual register from the state.
/// This should only be done in cases where it is known that no virtual registers can be alive.
///
/// Example: At the start of a basic block no virtual registers should be alive.
pub fn remove_virtual_register(&mut self) {
self.register = self
.register
.clone()
.into_iter()
.filter(|(register, _value)| !register.is_temp)
.collect();
}
/// Recursively remove all `caller_stack_ids` not corresponding to the given caller.
pub fn remove_other_caller_stack_ids(&mut self, caller_id: &AbstractIdentifier) {
let mut ids_to_remove = self.caller_stack_ids.clone();
ids_to_remove.remove(caller_id);
for register_value in self.register.values_mut() {
register_value.remove_ids(&ids_to_remove);
}
self.memory.remove_ids(&ids_to_remove);
self.caller_stack_ids = BTreeSet::new();
self.caller_stack_ids.insert(caller_id.clone());
self.ids_known_to_caller = self
.ids_known_to_caller
.difference(&ids_to_remove)
.cloned()
.collect();
}
/// Add those objects from the `caller_state` to `self`, that are not known to `self`.
///
/// Since self does not know these objects, we assume that the current function could not have accessed
/// them in any way during execution.
/// This means they are unchanged from the moment of the call until the return from the call,
/// thus we can simply copy their object-state from the moment of the call.
pub fn readd_caller_objects(&mut self, caller_state: &State) {
self.memory.append_unknown_objects(&caller_state.memory);
}
/// Restore the content of callee-saved registers from the caller state.
///
/// This function does not check what the callee state currently contains in these registers.
/// If the callee does not adhere to the given calling convention, this may introduce analysis errors!
/// It will also mask cases,
/// where a callee-saved register was incorrectly modified (e.g. because of a bug in the callee).
pub fn restore_callee_saved_register(
&mut self,
caller_state: &State,
cconv: &CallingConvention,
) {
for (register, value) in caller_state.register.iter() {
if cconv
.callee_saved_register
.iter()
.any(|reg_name| *reg_name == register.name)
{
self.set_register(register, value.clone());
}
}
}
/// Remove all knowledge about the contents of callee-saved registers from the state.
pub fn remove_callee_saved_register(&mut self, cconv: &CallingConvention) {
let mut register_to_remove = Vec::new();
for register in self.register.keys() {
if cconv
.callee_saved_register
.iter()
.any(|reg_name| *reg_name == register.name)
{
register_to_remove.push(register.clone());
}
}
for register in register_to_remove {
self.register.remove(®ister);
}
}
}
impl AbstractDomain for State {
/// Merge two states
fn merge(&self, other: &Self) -> Self {
assert_eq!(self.stack_id, other.stack_id);
let mut merged_register = BTreeMap::new();
for (register, other_value) in other.register.iter() {
if let Some(value) = self.register.get(register) {
let merged_value = value.merge(other_value);
if !merged_value.is_top() {
// We only have to keep non-*Top* elements.
merged_register.insert(register.clone(), merged_value);
}
}
}
let merged_memory_objects = self.memory.merge(&other.memory);
State {
register: merged_register,
memory: merged_memory_objects,
stack_id: self.stack_id.clone(),
caller_stack_ids: self
.caller_stack_ids
.union(&other.caller_stack_ids)
.cloned()
.collect(),
ids_known_to_caller: self
.ids_known_to_caller
.union(&other.ids_known_to_caller)
.cloned()
.collect(),
}
}
/// A state has no *Top* element
fn is_top(&self) -> bool {
false
}
}
impl State {
/// Get a more compact json-representation of the state.
/// Intended for pretty printing, not useable for serialization/deserialization.
pub fn to_json_compact(&self) -> serde_json::Value {
use serde_json::*;
let mut state_map = Map::new();
let register = self
.register
.iter()
.map(|(var, data)| (var.name.clone(), data.to_json_compact()))
.collect();
let register = Value::Object(register);
state_map.insert("register".into(), register);
state_map.insert("memory".into(), self.memory.to_json_compact());
state_map.insert(
"stack_id".into(),
Value::String(format!("{}", self.stack_id)),
);
state_map.insert(
"caller_stack_ids".into(),
Value::Array(
self.caller_stack_ids
.iter()
.map(|id| Value::String(format!("{}", id)))
.collect(),
),
);
state_map.insert(
"ids_known_to_caller".into(),
Value::Array(
self.ids_known_to_caller
.iter()
.map(|id| Value::String(format!("{}", id)))
.collect(),
),
);
Value::Object(state_map)
}
}
#[cfg(test)]
mod tests;