CBMC
taint_analysis.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Taint Analysis
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
12 #include "taint_analysis.h"
13 
14 #include <util/invariant.h>
15 #include <util/json.h>
16 #include <util/message.h>
17 #include <util/pointer_expr.h>
18 #include <util/prefix.h>
19 #include <util/simplify_expr.h>
20 #include <util/std_code.h>
21 #include <util/string_constant.h>
22 
24 
26 
27 #include "taint_parser.h"
28 
29 #include <fstream> // IWYU pragma: keep
30 #include <iostream>
31 
33 {
34 public:
35  explicit taint_analysist(message_handlert &message_handler)
36  : log(message_handler)
37  {
38  }
39 
40  bool operator()(
41  const std::string &taint_file_name,
42  const symbol_tablet &,
44  bool show_full,
45  const std::optional<std::string> &json_file_name);
46 
47 protected:
51 
52  void instrument(const namespacet &, goto_functionst &);
54 };
55 
57  const namespacet &ns,
58  goto_functionst &goto_functions)
59 {
60  for(auto &function : goto_functions.function_map)
61  instrument(ns, function.second);
62 }
63 
65  const namespacet &ns,
66  goto_functionst::goto_functiont &goto_function)
67 {
68  for(goto_programt::instructionst::iterator
69  it=goto_function.body.instructions.begin();
70  it!=goto_function.body.instructions.end();
71  it++)
72  {
73  const goto_programt::instructiont &instruction=*it;
74 
75  goto_programt insert_before, insert_after;
76 
77  if(instruction.is_function_call())
78  {
79  const exprt &function = instruction.call_function();
80 
81  if(function.id() == ID_symbol)
82  {
83  const irep_idt &identifier = to_symbol_expr(function).get_identifier();
84 
85  std::set<irep_idt> identifiers;
86 
87  identifiers.insert(identifier);
88 
89  irep_idt class_id = function.get(ID_C_class);
90  if(class_id.empty())
91  {
92  }
93  else
94  {
95  std::string suffix = std::string(
96  id2string(identifier), class_id.size(), std::string::npos);
97 
98  class_hierarchyt::idst parents =
100  for(const auto &p : parents)
101  identifiers.insert(id2string(p) + suffix);
102  }
103 
104  for(const auto &rule : taint.rules)
105  {
106  bool match = false;
107  for(const auto &i : identifiers)
108  {
109  if(
110  i == rule.function_identifier ||
111  has_prefix(
112  id2string(i),
113  "java::" + id2string(rule.function_identifier) + ":"))
114  {
115  match = true;
116  break;
117  }
118  }
119 
120  if(match)
121  {
122  log.debug() << "MATCH " << rule.id << " on " << identifier
123  << messaget::eom;
124 
125  exprt where = nil_exprt();
126 
127  const code_typet &code_type = to_code_type(function.type());
128 
129  bool have_this = !code_type.parameters().empty() &&
130  code_type.parameters().front().get_bool(ID_C_this);
131 
132  switch(rule.where)
133  {
135  {
137  ns.lookup(id2string(identifier) + "#return_value");
138  where = return_value_symbol.symbol_expr();
139  break;
140  }
141 
143  {
144  unsigned nr =
145  have_this ? rule.parameter_number : rule.parameter_number - 1;
146  if(instruction.call_arguments().size() > nr)
147  where = instruction.call_arguments()[nr];
148  break;
149  }
150 
152  if(have_this)
153  {
155  !instruction.call_arguments().empty(),
156  "`this` implies at least one argument in function call");
157  where = instruction.call_arguments()[0];
158  }
159  break;
160  }
161 
162  switch(rule.kind)
163  {
165  {
166  codet code_set_may{ID_set_may};
167  code_set_may.operands().resize(2);
168  code_set_may.op0() = where;
169  code_set_may.op1() =
170  address_of_exprt(string_constantt(rule.taint));
171  insert_after.add(goto_programt::make_other(
172  code_set_may, instruction.source_location()));
173  break;
174  }
175 
177  {
178  binary_predicate_exprt get_may{
179  where,
180  ID_get_may,
181  address_of_exprt(string_constantt(rule.taint))};
182  source_locationt annotated_location =
183  instruction.source_location();
184  annotated_location.set_property_class(
185  "taint rule " + id2string(rule.id));
186  annotated_location.set_comment(rule.message);
187  insert_before.add(goto_programt::make_assertion(
188  not_exprt(get_may), annotated_location));
189  break;
190  }
191 
193  {
194  codet code_clear_may{ID_clear_may};
195  code_clear_may.operands().resize(2);
196  code_clear_may.op0() = where;
197  code_clear_may.op1() =
198  address_of_exprt(string_constantt(rule.taint));
199  insert_after.add(goto_programt::make_other(
200  code_clear_may, instruction.source_location()));
201  break;
202  }
203  }
204  }
205  }
206  }
207  }
208 
209  if(!insert_before.empty())
210  {
211  goto_function.body.insert_before_swap(it, insert_before);
212  // advance until we get back to the call
213  while(!it->is_function_call()) ++it;
214  }
215 
216  if(!insert_after.empty())
217  {
218  goto_function.body.destructive_insert(
219  std::next(it), insert_after);
220  }
221  }
222 }
223 
225  const std::string &taint_file_name,
226  const symbol_tablet &symbol_table,
227  goto_functionst &goto_functions,
228  bool show_full,
229  const std::optional<std::string> &json_file_name)
230 {
231  try
232  {
233  json_arrayt json_result;
234  bool use_json = json_file_name.has_value();
235 
236  log.status() << "Reading taint file '" << taint_file_name << "'"
237  << messaget::eom;
238 
239  if(taint_parser(taint_file_name, taint, log.get_message_handler()))
240  {
241  log.error() << "Failed to read taint definition file" << messaget::eom;
242  return true;
243  }
244 
245  log.status() << "Got " << taint.rules.size() << " taint definitions"
246  << messaget::eom;
247 
248  log.conditional_output(log.debug(), [this](messaget::mstreamt &mstream) {
249  taint.output(mstream);
250  mstream << messaget::eom;
251  });
252 
253  log.status() << "Instrumenting taint" << messaget::eom;
254 
255  class_hierarchy(symbol_table);
256 
257  const namespacet ns(symbol_table);
258  instrument(ns, goto_functions);
259  goto_functions.update();
260 
261  bool have_entry_point=
262  goto_functions.function_map.find(goto_functionst::entry_point())!=
263  goto_functions.function_map.end();
264 
265  // do we have an entry point?
266  if(have_entry_point)
267  {
268  log.status() << "Working from entry point" << messaget::eom;
269  }
270  else
271  {
272  log.status() << "No entry point found; "
273  << "we will consider the heads of all functions as reachable"
274  << messaget::eom;
275 
276  goto_programt end, gotos, calls;
277 
279 
280  for(const auto &gf_entry : goto_functions.function_map)
281  {
282  if(
283  gf_entry.second.body_available() &&
284  gf_entry.first != goto_functionst::entry_point())
285  {
286  const symbolt &symbol = ns.lookup(gf_entry.first);
287  const code_function_callt call(symbol.symbol_expr());
290  calls.add(
294  }
295  }
296 
298  goto_functions.function_map[goto_functionst::entry_point()];
299 
300  goto_programt &body=entry.body;
301 
302  body.destructive_append(gotos);
303  body.destructive_append(calls);
304  body.destructive_append(end);
305 
306  goto_functions.update();
307  }
308 
309  log.status() << "Data-flow analysis" << messaget::eom;
310 
311  custom_bitvector_analysist custom_bitvector_analysis;
312  custom_bitvector_analysis(goto_functions, ns);
313 
314  if(show_full)
315  {
316  custom_bitvector_analysis.output(ns, goto_functions, std::cout);
317  return false;
318  }
319 
320  for(const auto &gf_entry : goto_functions.function_map)
321  {
322  if(!gf_entry.second.body.has_assertion())
323  continue;
324 
325  const symbolt &symbol = ns.lookup(gf_entry.first);
326 
327  if(gf_entry.first == "__actual_thread_spawn")
328  continue;
329 
330  bool first=true;
331 
332  forall_goto_program_instructions(i_it, gf_entry.second.body)
333  {
334  if(!i_it->is_assert())
335  continue;
336 
337  if(!custom_bitvector_domaint::has_get_must_or_may(i_it->condition()))
338  {
339  continue;
340  }
341 
342  if(custom_bitvector_analysis[i_it].has_values.is_false())
343  continue;
344 
345  exprt result = custom_bitvector_analysis.eval(i_it->condition(), i_it);
346  if(simplify_expr(std::move(result), ns).is_true())
347  continue;
348 
349  if(first)
350  {
351  first=false;
352  if(!use_json)
353  std::cout << "\n"
354  << "******** Function " << symbol.display_name() << '\n';
355  }
356 
357  if(use_json)
358  {
360  {"bugClass",
361  json_stringt(i_it->source_location().get_property_class())},
362  {"file", json_stringt(i_it->source_location().get_file())},
363  {"line",
364  json_numbert(id2string(i_it->source_location().get_line()))}};
365  json_result.push_back(std::move(json));
366  }
367  else
368  {
369  std::cout << i_it->source_location();
370  if(!i_it->source_location().get_comment().empty())
371  std::cout << ": " << i_it->source_location().get_comment();
372 
373  if(!i_it->source_location().get_property_class().empty())
374  std::cout << " (" << i_it->source_location().get_property_class()
375  << ")";
376 
377  std::cout << '\n';
378  }
379  }
380  }
381 
382  if(use_json)
383  {
384  std::ofstream json_out(json_file_name.value());
385 
386  if(!json_out)
387  {
388  log.error() << "Failed to open json output '" << json_file_name.value()
389  << "'" << messaget::eom;
390  return true;
391  }
392 
393  log.status() << "Analysis result is written to '"
394  << json_file_name.value() << "'" << messaget::eom;
395 
396  json_out << json_result << '\n';
397  }
398 
399  return false;
400  }
401  catch(const char *error_msg)
402  {
403  log.error() << error_msg << messaget::eom;
404  return true;
405  }
406  catch(const std::string &error_msg)
407  {
408  log.error() << error_msg << messaget::eom;
409  return true;
410  }
411  catch(...)
412  {
413  log.error() << "Caught unexpected error in taint_analysist::operator()"
414  << messaget::eom;
415  return true;
416  }
417 }
418 
420  goto_modelt &goto_model,
421  const std::string &taint_file_name,
422  message_handlert &message_handler,
423  bool show_full,
424  const std::optional<std::string> &json_file_name)
425 {
426  taint_analysist taint_analysis(message_handler);
427  return taint_analysis(
428  taint_file_name,
429  goto_model.symbol_table,
430  goto_model.goto_functions,
431  show_full,
432  json_file_name);
433 }
Class Hierarchy.
Operator to return the address of an object.
Definition: pointer_expr.h:540
virtual void output(const namespacet &ns, const irep_idt &function_id, const goto_programt &goto_program, std::ostream &out) const
Output the abstract states for a single function.
Definition: ai.cpp:39
A base class for expressions that are predicates, i.e., Boolean-typed, and that take exactly two argu...
Definition: std_expr.h:731
The Boolean type.
Definition: std_types.h:36
Non-graph-based representation of the class hierarchy.
idst get_parents_trans(const irep_idt &id) const
std::vector< irep_idt > idst
goto_instruction_codet representation of a function call statement.
Base type of functions.
Definition: std_types.h:583
const parameterst & parameters() const
Definition: std_types.h:699
Data structure for representing an arbitrary statement in a program.
Definition: std_code_base.h:29
exprt eval(const exprt &src, locationt loc)
static bool has_get_must_or_may(const exprt &)
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
bool empty() const
Definition: dstring.h:89
size_t size() const
Definition: dstring.h:121
Base class for all expressions.
Definition: expr.h:56
exprt & op1()
Definition: expr.h:136
operandst & operands()
Definition: expr.h:94
A collection of goto functions.
function_mapt function_map
::goto_functiont goto_functiont
static irep_idt entry_point()
Get the identifier of the entry point to a goto model.
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:31
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:34
This class represents an instruction in the GOTO intermediate representation.
Definition: goto_program.h:181
const exprt & call_function() const
Get the function that is called for FUNCTION_CALL.
Definition: goto_program.h:272
const source_locationt & source_location() const
Definition: goto_program.h:333
const exprt::operandst & call_arguments() const
Get the arguments of a FUNCTION_CALL.
Definition: goto_program.h:300
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:73
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:622
static instructiont make_end_function(const source_locationt &l=source_locationt::nil())
instructionst::iterator targett
Definition: goto_program.h:614
void destructive_append(goto_programt &p)
Appends the given program p to *this. p is destroyed.
Definition: goto_program.h:722
static instructiont make_function_call(const code_function_callt &_code, const source_locationt &l=source_locationt::nil())
Create a function call instruction.
static instructiont make_other(const goto_instruction_codet &_code, const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:957
targett add(instructiont &&instruction)
Adds a given instruction at the end.
Definition: goto_program.h:739
static instructiont make_goto(targett _target, const source_locationt &l=source_locationt::nil())
bool empty() const
Is the program empty?
Definition: goto_program.h:799
static instructiont make_assertion(const exprt &g, const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:933
jsont & push_back(const jsont &json)
Definition: json.h:212
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:154
mstreamt & error() const
Definition: message.h:391
mstreamt & status() const
Definition: message.h:406
void conditional_output(mstreamt &mstream, const std::function< void(mstreamt &)> &output_generator) const
Generate output to message_stream using output_generator if the configured verbosity is at least as h...
Definition: message.cpp:139
mstreamt & debug() const
Definition: message.h:421
message_handlert & get_message_handler()
Definition: message.h:183
static eomt eom
Definition: message.h:289
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:94
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
Definition: namespace.cpp:148
The NIL expression.
Definition: std_expr.h:3086
Boolean negation.
Definition: std_expr.h:2332
A side_effect_exprt that returns a non-deterministically chosen value.
Definition: std_code.h:1520
void set_comment(const irep_idt &comment)
void set_property_class(const irep_idt &property_class)
const irep_idt & get_identifier() const
Definition: std_expr.h:160
The symbol table.
Definition: symbol_table.h:14
Symbol table entry.
Definition: symbol.h:28
const irep_idt & display_name() const
Return language specific display name if present.
Definition: symbol.h:55
source_locationt location
Source code location of definition of symbol.
Definition: symbol.h:37
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:121
bool operator()(const std::string &taint_file_name, const symbol_tablet &, goto_functionst &, bool show_full, const std::optional< std::string > &json_file_name)
void instrument(const namespacet &, goto_functionst &)
taint_parse_treet taint
taint_analysist(message_handlert &message_handler)
class_hierarchyt class_hierarchy
The Boolean constant true.
Definition: std_expr.h:3068
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
Field-insensitive, location-sensitive bitvector analysis.
#define forall_goto_program_instructions(it, program)
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
bool is_true(const literalt &l)
Definition: literal.h:198
API to expression classes for Pointers.
static void json(json_objectT &result, const irep_idt &property_id, const property_infot &property_info)
Definition: properties.cpp:120
symbol_exprt return_value_symbol(const irep_idt &identifier, const namespacet &ns)
produces the symbol that is used to store the return value of the function with the given identifier
exprt simplify_expr(exprt src, const namespacet &ns)
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:534
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:272
const code_typet & to_code_type(const typet &type)
Cast a typet to a code_typet.
Definition: std_types.h:788
bool taint_analysis(goto_modelt &goto_model, const std::string &taint_file_name, message_handlert &message_handler, bool show_full, const std::optional< std::string > &json_file_name)
Taint Analysis.
bool taint_parser(const std::string &file_name, taint_parse_treet &dest, message_handlert &message_handler)
Taint Parser.