1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
//! The pointer inference analysis.
//!
//! The goal of the pointer inference analysis is to keep track of all memory objects and pointers
//! that the program knows about at specific program points during execution.
//! Possible memory management errors, like access to memory that may already have been freed,
//! are reported to the user.
//!
//! Keep in mind that the analysis operates on a best-effort basis.
//! In cases where we cannot know
//! whether an error is due to an error in the memory management of the program under analysis
//! or due to inexactness of the pointer inference analysis itself,
//! we try to treat is as the more likely (but not necessarily true) case of the two.
use super::interprocedural_fixpoint::{Computation, NodeValue};
use crate::abstract_domain::{BitvectorDomain, DataDomain};
use crate::analysis::graph::{Graph, Node};
use crate::term::*;
use crate::utils::log::*;
use petgraph::graph::NodeIndex;
use petgraph::visit::IntoNodeReferences;
use petgraph::Direction;
use std::collections::HashMap;
mod context;
mod object;
mod object_list;
mod state;
use context::Context;
use state::State;
/// The version number of the analysis.
const VERSION: &str = "0.1";
/// The abstract domain type for representing register values.
type Data = DataDomain<BitvectorDomain>;
/// A wrapper struct for the pointer inference computation object.
pub struct PointerInference<'a> {
computation: Computation<'a, Context<'a>>,
log_collector: crossbeam_channel::Sender<LogMessage>,
}
impl<'a> PointerInference<'a> {
/// Generate a new pointer inference compuation for a project.
pub fn new(
project: &'a Project,
cwe_sender: crossbeam_channel::Sender<CweWarning>,
log_sender: crossbeam_channel::Sender<LogMessage>,
) -> PointerInference<'a> {
let context = Context::new(project, cwe_sender, log_sender.clone());
let mut entry_sub_to_entry_blocks_map = HashMap::new();
let subs: HashMap<Tid, &Term<Sub>> = project
.program
.term
.subs
.iter()
.map(|sub| (sub.tid.clone(), sub))
.collect();
for sub_tid in project.program.term.entry_points.iter() {
if let Some(sub) = subs.get(sub_tid) {
if let Some(entry_block) = sub.term.blocks.iter().next() {
entry_sub_to_entry_blocks_map.insert(sub_tid, entry_block.tid.clone());
}
}
}
let tid_to_graph_indices_map = super::graph::get_indices_of_block_nodes(
&context.graph,
entry_sub_to_entry_blocks_map.values(),
);
let entry_sub_to_entry_node_map: HashMap<Tid, NodeIndex> = entry_sub_to_entry_blocks_map
.into_iter()
.filter_map(|(sub_tid, block_tid)| {
if let Some((start_node_index, _end_node_index)) =
tid_to_graph_indices_map.get(&block_tid)
{
Some((sub_tid.clone(), *start_node_index))
} else {
None
}
})
.collect();
let mut fixpoint_computation =
super::interprocedural_fixpoint::Computation::new(context, None);
log_sender
.send(LogMessage {
text: format!(
"Pointer Inference: Adding {} entry points",
entry_sub_to_entry_node_map.len()
),
level: LogLevel::Debug,
location: None,
})
.unwrap();
for (sub_tid, start_node_index) in entry_sub_to_entry_node_map.into_iter() {
fixpoint_computation.set_node_value(
start_node_index,
super::interprocedural_fixpoint::NodeValue::Value(State::new(
&project.stack_pointer_register,
sub_tid,
)),
);
}
PointerInference {
computation: fixpoint_computation,
log_collector: log_sender,
}
}
/// Compute the fixpoint of the pointer inference analysis.
/// Has a `max_steps` bound for the fixpoint algorithm to prevent infinite loops.
pub fn compute(&mut self) {
self.computation.compute_with_max_steps(100); // TODO: make max_steps configurable!
}
/// Print results serialized as YAML to stdout
pub fn print_yaml(&self) {
let graph = self.computation.get_graph();
for (node_index, value) in self.computation.node_values().iter() {
let node = graph.node_weight(*node_index).unwrap();
if let Ok(string) = serde_yaml::to_string(&(node, value)) {
println!("{}", string);
} else {
println!(
"Serializing failed at {:?} with {:?}",
node_index,
serde_yaml::to_string(value)
);
}
}
}
/// Generate a compacted json representation of the results.
/// Note that this output cannot be used for serialization/deserialization,
/// but is only intended for user output.
pub fn generate_compact_json(&self) -> serde_json::Value {
let graph = self.computation.get_graph();
let mut json_nodes = serde_json::Map::new();
for (node_index, node_value) in self.computation.node_values().iter() {
let node = graph.node_weight(*node_index).unwrap();
if let NodeValue::Value(value) = node_value {
json_nodes.insert(format!("{}", node), value.to_json_compact());
}
}
serde_json::Value::Object(json_nodes)
}
pub fn print_compact_json(&self) {
println!("{:#}", self.generate_compact_json());
}
pub fn get_graph(&self) -> &Graph {
self.computation.get_graph()
}
/// Add speculative entry points to the fixpoint algorithm state.
///
/// Since indirect jumps and calls are not handled yet (TODO: change that),
/// the analysis may miss a *lot* of code in some cases.
/// To remedy this somewhat,
/// we mark all function starts, that are also roots in the control flow graph
/// and do not have a state assigned to them yet, as additional entry points.
///
/// If `only_cfg_roots` is set to `false`, then all function starts without a state are marked as roots.
fn add_speculative_entry_points(&mut self, project: &Project, only_cfg_roots: bool) {
// TODO: Refactor the fixpoint computation structs, so that the project reference can be extracted from them.
let mut start_block_to_sub_map: HashMap<&Tid, &Term<Sub>> = HashMap::new();
for sub in project.program.term.subs.iter() {
if project
.program
.term
.extern_symbols
.iter()
.any(|symbol| symbol.tid == sub.tid)
{
continue; // We ignore functions marked as extern symbols.
}
if let Some(start_block) = sub.term.blocks.first() {
start_block_to_sub_map.insert(&start_block.tid, sub);
}
}
let graph = self.computation.get_graph();
let mut new_entry_points = Vec::new();
for (node_id, node) in graph.node_references() {
if let Node::BlkStart(block) = node {
if !(start_block_to_sub_map.get(&block.tid).is_none()
|| self.computation.get_node_value(node_id).is_some()
|| only_cfg_roots
&& graph
.neighbors_directed(node_id, Direction::Incoming)
.next()
.is_some())
{
new_entry_points.push(node_id);
}
}
}
self.log_debug(format!(
"Pointer Inference: Adding {} speculative entry points",
new_entry_points.len()
));
for entry in new_entry_points {
let sub_tid = start_block_to_sub_map
[&self.computation.get_graph()[entry].get_block().tid]
.tid
.clone();
self.computation.set_node_value(
entry,
super::interprocedural_fixpoint::NodeValue::Value(State::new(
&project.stack_pointer_register,
sub_tid,
)),
);
}
}
/// Print the number of blocks that have a state associated to them.
/// Intended for debug purposes.
fn count_blocks_with_state(&self) {
let graph = self.computation.get_graph();
let mut stateful_blocks: i64 = 0;
let mut all_blocks: i64 = 0;
for (node_id, node) in graph.node_references() {
if let Node::BlkStart(_block) = node {
all_blocks += 1;
if self.computation.get_node_value(node_id).is_some() {
stateful_blocks += 1;
}
}
}
self.log_debug(format!(
"Pointer Inference: Blocks with state: {} / {}",
stateful_blocks, all_blocks
));
}
fn log_debug(&self, msg: impl Into<String>) {
let log_msg = LogMessage {
text: msg.into(),
level: LogLevel::Debug,
location: None,
};
self.log_collector.send(log_msg).unwrap();
}
}
/// Generate and execute the pointer inference analysis.
/// Returns a vector of all found CWE warnings and a vector of all log messages generated during analysis.
pub fn run(project: &Project, print_debug: bool) -> (Vec<CweWarning>, Vec<String>) {
let (cwe_sender, cwe_receiver) = crossbeam_channel::unbounded();
let (log_sender, log_receiver) = crossbeam_channel::unbounded();
let warning_collector_thread = std::thread::spawn(move || collect_cwe_warnings(cwe_receiver));
let log_collector_thread = std::thread::spawn(move || collect_logs(log_receiver));
{
// Scope the computation object so that it is dropped before the warning collector thread is joined.
// Else the warning collector thread will not terminate (the cwe_sender needs to be dropped for it to terminate).
let mut computation = PointerInference::new(project, cwe_sender, log_sender);
computation.compute();
computation.count_blocks_with_state();
// Now compute again with speculative entry points added
computation.add_speculative_entry_points(project, true);
computation.compute();
computation.count_blocks_with_state();
// Now compute again with all missed functions as additional entry points
computation.add_speculative_entry_points(project, false);
computation.compute();
computation.count_blocks_with_state();
if print_debug {
computation.print_compact_json();
}
}
// Return the CWE warnings
(
warning_collector_thread.join().unwrap(),
log_collector_thread.join().unwrap(),
)
}
/// Collect CWE warnings from the receiver until the channel is closed. Then return them.
fn collect_cwe_warnings(receiver: crossbeam_channel::Receiver<CweWarning>) -> Vec<CweWarning> {
let mut collected_warnings = HashMap::new();
while let Ok(warning) = receiver.recv() {
match &warning.addresses[..] {
[] => unimplemented!(),
[address, ..] => {
collected_warnings.insert(address.clone(), warning);
}
}
}
collected_warnings
.drain()
.map(|(_key, value)| value)
.collect()
}
/// Collect log messages from the receiver until the channel is closed. Then return them.
fn collect_logs(receiver: crossbeam_channel::Receiver<LogMessage>) -> Vec<String> {
let mut logs_with_address = HashMap::new();
let mut general_logs = Vec::new();
while let Ok(log_message) = receiver.recv() {
if let Some(ref tid) = log_message.location {
logs_with_address.insert(tid.address.clone(), log_message);
} else {
general_logs.push(log_message);
}
}
logs_with_address
.values()
.cloned()
.chain(general_logs.into_iter())
.map(|msg| msg.to_string())
.collect()
}