CBMC
remove_function_pointers.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Program Transformation
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
13 
14 #include <util/arith_tools.h>
15 #include <util/byte_operators.h>
16 #include <util/c_types.h>
17 #include <util/fresh_symbol.h>
18 #include <util/invariant.h>
19 #include <util/message.h>
20 #include <util/pointer_expr.h>
22 #include <util/source_location.h>
23 #include <util/std_code.h>
24 #include <util/std_expr.h>
25 #include <util/string_utils.h>
26 
28 
30 #include "goto_model.h"
32 #include "remove_skip.h"
33 
35 {
36 public:
38  message_handlert &_message_handler,
39  symbol_tablet &_symbol_table,
41  const goto_functionst &goto_functions);
42 
43  void operator()(goto_functionst &goto_functions);
44 
46  goto_programt &goto_program,
47  const irep_idt &function_id);
48 
49 protected:
51  const namespacet ns;
53 
54  // We can optionally halt the FP removal if we aren't able to use
55  // remove_const_function_pointerst to successfully narrow to a small
56  // subset of possible functions and just leave the function pointer
57  // as it is.
58  // This can be activated in goto-instrument using
59  // --remove-const-function-pointers instead of --remove-function-pointers
61 
69  goto_programt &goto_program,
70  const irep_idt &function_id,
71  goto_programt::targett target);
72 
73  std::unordered_set<irep_idt> address_taken;
74 
75  typedef std::map<irep_idt, code_typet> type_mapt;
77 };
78 
80  message_handlert &_message_handler,
81  symbol_tablet &_symbol_table,
82  bool only_resolve_const_fps,
83  const goto_functionst &goto_functions)
84  : message_handler(_message_handler),
85  ns(_symbol_table),
86  symbol_table(_symbol_table),
87  only_resolve_const_fps(only_resolve_const_fps)
88 {
89  for(const auto &s : symbol_table.symbols)
91 
93 
94  // build type map
95  for(const auto &gf_entry : goto_functions.function_map)
96  {
97  type_map.emplace(
98  gf_entry.first, to_code_type(ns.lookup(gf_entry.first).type));
99  }
100 }
101 
103  const typet &call_type,
104  const typet &function_type,
105  const namespacet &ns)
106 {
107  if(call_type == function_type)
108  return true;
109 
110  // any integer-vs-enum-vs-pointer is ok
111  if(
112  call_type.id() == ID_signedbv || call_type.id() == ID_unsignedbv ||
113  call_type.id() == ID_bool || call_type.id() == ID_c_bool ||
114  call_type.id() == ID_c_enum_tag || call_type.id() == ID_c_enum ||
115  call_type.id() == ID_pointer)
116  {
117  return function_type.id() == ID_signedbv ||
118  function_type.id() == ID_unsignedbv ||
119  function_type.id() == ID_bool || function_type.id() == ID_c_bool ||
120  function_type.id() == ID_pointer ||
121  function_type.id() == ID_c_enum ||
122  function_type.id() == ID_c_enum_tag;
123  }
124 
125  return pointer_offset_bits(call_type, ns) ==
126  pointer_offset_bits(function_type, ns);
127 }
128 
130  bool return_value_used,
131  const code_typet &call_type,
132  const code_typet &function_type,
133  const namespacet &ns)
134 {
135  // we are willing to ignore anything that's returned
136  // if we call with 'void'
137  if(!return_value_used)
138  {
139  }
140  else if(call_type.return_type() == empty_typet())
141  {
142  // ok
143  }
144  else
145  {
147  call_type.return_type(), function_type.return_type(), ns))
148  return false;
149  }
150 
151  // let's look at the parameters
152  const code_typet::parameterst &call_parameters=call_type.parameters();
153  const code_typet::parameterst &function_parameters=function_type.parameters();
154 
155  if(function_type.has_ellipsis() &&
156  function_parameters.empty())
157  {
158  // always ok
159  }
160  else if(call_type.has_ellipsis() &&
161  call_parameters.empty())
162  {
163  // always ok
164  }
165  else
166  {
167  // we are quite strict here, could be much more generous
168  if(call_parameters.size()!=function_parameters.size())
169  return false;
170 
171  for(std::size_t i=0; i<call_parameters.size(); i++)
173  call_parameters[i].type(), function_parameters[i].type(), ns))
174  return false;
175  }
176 
177  return true;
178 }
179 
180 static void fix_argument_types(code_function_callt &function_call)
181 {
182  const code_typet &code_type = to_code_type(function_call.function().type());
183 
184  const code_typet::parameterst &function_parameters=
185  code_type.parameters();
186 
187  code_function_callt::argumentst &call_arguments=
188  function_call.arguments();
189 
190  for(std::size_t i=0; i<function_parameters.size(); i++)
191  {
192  if(i<call_arguments.size())
193  {
194  if(call_arguments[i].type() != function_parameters[i].type())
195  {
196  call_arguments[i] = make_byte_extract(
197  call_arguments[i],
199  function_parameters[i].type());
200  }
201  }
202  }
203 }
204 
205 static void fix_return_type(
206  const irep_idt &in_function_id,
207  code_function_callt &function_call,
208  const source_locationt &source_location,
209  symbol_tablet &symbol_table,
210  goto_programt &dest)
211 {
212  // are we returning anything at all?
213  if(function_call.lhs().is_nil())
214  return;
215 
216  const code_typet &code_type = to_code_type(function_call.function().type());
217 
218  // type already ok?
219  if(function_call.lhs().type() == code_type.return_type())
220  return;
221 
222  const namespacet ns(symbol_table);
223  const symbolt &function_symbol =
224  ns.lookup(to_symbol_expr(function_call.function()).get_identifier());
225 
226  symbolt &tmp_symbol = get_fresh_aux_symbol(
227  code_type.return_type(),
228  id2string(in_function_id),
229  "tmp_return_val_" + id2string(function_symbol.base_name),
230  source_location,
231  function_symbol.mode,
232  symbol_table);
233 
234  const symbol_exprt tmp_symbol_expr = tmp_symbol.symbol_expr();
235 
236  exprt old_lhs=function_call.lhs();
237  function_call.lhs()=tmp_symbol_expr;
238 
240  old_lhs,
242  tmp_symbol_expr, from_integer(0, c_index_type()), old_lhs.type()),
243  source_location));
244 }
245 
247  goto_programt &goto_program,
248  const irep_idt &function_id,
249  goto_programt::targett target)
250 {
251  const auto &function = to_dereference_expr(as_const(*target).call_function());
252 
253  // this better have the right type
254  code_typet call_type=to_code_type(function.type());
255 
256  // refine the type in case the forward declaration was incomplete
257  if(call_type.has_ellipsis() &&
258  call_type.parameters().empty())
259  {
260  call_type.remove_ellipsis();
261  for(const auto &argument : as_const(*target).call_arguments())
262  {
263  call_type.parameters().push_back(code_typet::parametert(argument.type()));
264  }
265  }
266 
267  bool found_functions;
268 
269  const exprt &pointer = function.pointer();
271  does_remove_constt const_removal_check(goto_program);
272  const auto does_remove_const = const_removal_check();
274  if(does_remove_const.first)
275  {
276  log.warning().source_location = does_remove_const.second;
277  log.warning() << "cast from const to non-const pointer found, "
278  << "only worst case function pointer removal will be done."
279  << messaget::eom;
280  found_functions=false;
281  }
282  else
283  {
285  log.get_message_handler(), ns, symbol_table);
286 
287  found_functions=fpr(pointer, functions);
288 
289  // if found_functions is false, functions should be empty
290  // however, it is possible for found_functions to be true and functions
291  // to be empty (this happens if the pointer can only resolve to the null
292  // pointer)
293  CHECK_RETURN(found_functions || functions.empty());
294 
295  if(functions.size()==1)
296  {
297  target->call_function() = *functions.cbegin();
298  return;
299  }
300  }
301 
302  if(!found_functions)
303  {
305  {
306  // If this mode is enabled, we only remove function pointers
307  // that we can resolve either to an exact function, or an exact subset
308  // (e.g. a variable index in a constant array).
309  // Since we haven't found functions, we would now resort to
310  // replacing the function pointer with any function with a valid signature
311  // Since we don't want to do that, we abort.
312  return;
313  }
314 
315  bool return_value_used = as_const(*target).call_lhs().is_not_nil();
316 
317  // get all type-compatible functions
318  // whose address is ever taken
319  for(const auto &t : type_map)
320  {
321  // address taken?
322  if(address_taken.find(t.first)==address_taken.end())
323  continue;
324 
325  // type-compatible?
327  return_value_used, call_type, t.second, ns))
328  continue;
329 
330  if(t.first=="pthread_mutex_cleanup")
331  continue;
332 
333  symbol_exprt expr(t.first, t.second);
334  functions.insert(expr);
335  }
336  }
337 
340  symbol_table,
341  goto_program,
342  function_id,
343  target,
344  functions);
345 }
346 
348  const std::unordered_set<symbol_exprt, irep_hash> &candidates)
349 {
350  std::stringstream comment;
351 
352  comment << "dereferenced function pointer must be ";
353 
354  if(candidates.size() == 1)
355  {
356  comment << candidates.begin()->get_identifier();
357  }
358  else if(candidates.empty())
359  {
360  comment.str("no candidates for dereferenced function pointer");
361  }
362  else
363  {
364  comment << "one of [";
365 
366  join_strings(
367  comment,
368  candidates.begin(),
369  candidates.end(),
370  ", ",
371  [](const symbol_exprt &s) { return s.get_identifier(); });
372 
373  comment << ']';
374  }
375 
376  return comment.str();
377 }
378 
380  message_handlert &message_handler,
381  symbol_tablet &symbol_table,
382  goto_programt &goto_program,
383  const irep_idt &function_id,
384  goto_programt::targett target,
385  const std::unordered_set<symbol_exprt, irep_hash> &functions)
386 {
387  const exprt &function = target->call_function();
388  const exprt &pointer = to_dereference_expr(function).pointer();
389 
390  // the final target is a skip
391  goto_programt final_skip;
392 
393  goto_programt::targett t_final =
394  final_skip.add(goto_programt::make_skip(target->source_location()));
395 
396  // build the calls and gotos
397 
398  goto_programt new_code_calls;
399  goto_programt new_code_gotos;
400 
401  for(const auto &fun : functions)
402  {
403  // call function
404  auto new_call =
405  code_function_callt(target->call_lhs(), fun, target->call_arguments());
406 
407  // the signature of the function might not match precisely
408  fix_argument_types(new_call);
409 
410  goto_programt tmp;
412  function_id, new_call, target->source_location(), symbol_table, tmp);
413 
414  auto call = new_code_calls.add(
415  goto_programt::make_function_call(new_call, target->source_location()));
416  new_code_calls.destructive_append(tmp);
417 
418  // goto final
419  new_code_calls.add(goto_programt::make_goto(
420  t_final, true_exprt(), target->source_location()));
421 
422  // goto to call
423  const address_of_exprt address_of(fun, pointer_type(fun.type()));
424 
425  const auto casted_address =
426  typecast_exprt::conditional_cast(address_of, pointer.type());
427 
428  new_code_gotos.add(goto_programt::make_goto(
429  call, equal_exprt(pointer, casted_address), target->source_location()));
430  }
431 
432  // fall-through
433  source_locationt annotated_location = target->source_location();
434  annotated_location.set_property_class("pointer dereference");
435  annotated_location.set_comment(function_pointer_assertion_comment(functions));
436  new_code_gotos.add(
437  goto_programt::make_assertion(false_exprt(), annotated_location));
438  new_code_gotos.add(
440 
441  goto_programt new_code;
442 
443  // patch them all together
444  new_code.destructive_append(new_code_gotos);
445  new_code.destructive_append(new_code_calls);
446  new_code.destructive_append(final_skip);
447 
448  goto_programt::targett next_target=target;
449  next_target++;
450 
451  goto_program.destructive_insert(next_target, new_code);
452 
453  // We preserve the original dereferencing to possibly catch
454  // further pointer-related errors.
455  code_expressiont code_expression(function);
456  code_expression.add_source_location()=function.source_location();
457  *target =
458  goto_programt::make_other(code_expression, target->source_location());
459 
460  // report statistics
461  messaget log{message_handler};
462  log.statistics().source_location = target->source_location();
463  log.statistics() << "replacing function pointer by " << functions.size()
464  << " possible targets" << messaget::eom;
465 
466  // list the names of functions when verbosity is at debug level
467  log.conditional_output(
468  log.debug(), [&functions](messaget::mstreamt &mstream) {
469  mstream << "targets: ";
470 
471  bool first = true;
472  for(const auto &function : functions)
473  {
474  if(!first)
475  mstream << ", ";
476 
477  mstream << function.get_identifier();
478  first = false;
479  }
480 
481  mstream << messaget::eom;
482  });
483 }
484 
486  goto_programt &goto_program,
487  const irep_idt &function_id)
488 {
489  bool did_something=false;
490 
491  Forall_goto_program_instructions(target, goto_program)
492  if(target->is_function_call())
493  {
494  if(target->call_function().id() == ID_dereference)
495  {
496  remove_function_pointer(goto_program, function_id, target);
497  did_something=true;
498  }
499  }
500 
501  if(did_something)
502  remove_skip(goto_program);
503 
504  return did_something;
505 }
506 
508 {
509  bool did_something=false;
510 
511  for(goto_functionst::function_mapt::iterator f_it=
512  functions.function_map.begin();
513  f_it!=functions.function_map.end();
514  f_it++)
515  {
516  goto_programt &goto_program=f_it->second.body;
517 
518  if(remove_function_pointers(goto_program, f_it->first))
519  did_something=true;
520  }
521 
522  if(did_something)
523  functions.compute_location_numbers();
524 }
525 
527  message_handlert &_message_handler,
528  goto_modelt &goto_model,
529  bool only_remove_const_fps)
530 {
532  _message_handler,
533  goto_model.symbol_table,
534  only_remove_const_fps,
535  goto_model.goto_functions);
536 
537  rfp(goto_model.goto_functions);
538 }
539 
540 bool has_function_pointers(const goto_programt &goto_program)
541 {
542  for(auto &instruction : goto_program.instructions)
543  if(
544  instruction.is_function_call() &&
545  instruction.call_function().id() == ID_dereference)
546  {
547  return true;
548  }
549 
550  return false;
551 }
552 
553 bool has_function_pointers(const goto_functionst &goto_functions)
554 {
555  for(auto &function_it : goto_functions.function_map)
556  if(has_function_pointers(function_it.second.body))
557  return true;
558 
559  return false;
560 }
561 
562 bool has_function_pointers(const goto_modelt &goto_model)
563 {
564  return has_function_pointers(goto_model.goto_functions);
565 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
const T & as_const(T &value)
Return a reference to the same object but ensures the type is const.
Definition: as_const.h:14
byte_extract_exprt make_byte_extract(const exprt &_op, const exprt &_offset, const typet &_type)
Construct a byte_extract_exprt with endianness and byte width matching the current configuration.
Expression classes for byte-level operators.
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:235
bitvector_typet c_index_type()
Definition: c_types.cpp:16
Operator to return the address of an object.
Definition: pointer_expr.h:540
codet representation of an expression statement.
Definition: std_code.h:1394
goto_instruction_codet representation of a function call statement.
exprt::operandst argumentst
Base type of functions.
Definition: std_types.h:583
std::vector< parametert > parameterst
Definition: std_types.h:585
const typet & return_type() const
Definition: std_types.h:689
void remove_ellipsis()
Definition: std_types.h:684
bool has_ellipsis() const
Definition: std_types.h:655
const parameterst & parameters() const
Definition: std_types.h:699
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
The empty type.
Definition: std_types.h:51
Equality.
Definition: std_expr.h:1366
Base class for all expressions.
Definition: expr.h:56
source_locationt & add_source_location()
Definition: expr.h:236
const source_locationt & source_location() const
Definition: expr.h:231
typet & type()
Return the type of the expression.
Definition: expr.h:84
The Boolean constant false.
Definition: std_expr.h:3082
A collection of goto functions.
function_mapt function_map
void compute_location_numbers()
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:31
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:34
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:73
static instructiont make_assumption(const exprt &g, const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:945
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:622
void destructive_insert(const_targett target, goto_programt &p)
Inserts the given program p before target.
Definition: goto_program.h:730
instructionst::iterator targett
Definition: goto_program.h:614
void destructive_append(goto_programt &p)
Appends the given program p to *this. p is destroyed.
Definition: goto_program.h:722
static instructiont make_assignment(const code_assignt &_code, const source_locationt &l=source_locationt::nil())
Create an assignment instruction.
static instructiont make_skip(const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:891
static instructiont make_function_call(const code_function_callt &_code, const source_locationt &l=source_locationt::nil())
Create a function call instruction.
static instructiont make_other(const goto_instruction_codet &_code, const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:957
targett add(instructiont &&instruction)
Adds a given instruction at the end.
Definition: goto_program.h:739
static instructiont make_goto(targett _target, const source_locationt &l=source_locationt::nil())
static instructiont make_assertion(const exprt &g, const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:933
const irep_idt & id() const
Definition: irep.h:388
bool is_nil() const
Definition: irep.h:368
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:154
static eomt eom
Definition: message.h:289
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:94
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
Definition: namespace.cpp:148
std::unordered_set< symbol_exprt, irep_hash > functionst
remove_function_pointerst(message_handlert &_message_handler, symbol_tablet &_symbol_table, bool only_resolve_const_fps, const goto_functionst &goto_functions)
std::map< irep_idt, code_typet > type_mapt
void operator()(goto_functionst &goto_functions)
void remove_function_pointer(goto_programt &goto_program, const irep_idt &function_id, goto_programt::targett target)
Replace a call to a dynamic function at location target in the given goto-program by determining func...
std::unordered_set< irep_idt > address_taken
bool remove_function_pointers(goto_programt &goto_program, const irep_idt &function_id)
void set_comment(const irep_idt &comment)
void set_property_class(const irep_idt &property_class)
Expression to hold a symbol (variable)
Definition: std_expr.h:131
const irep_idt & get_identifier() const
Definition: std_expr.h:160
const symbolst & symbols
Read-only field, used to look up symbols given their names.
The symbol table.
Definition: symbol_table.h:14
Symbol table entry.
Definition: symbol.h:28
irep_idt base_name
Base (non-scoped) name.
Definition: symbol.h:46
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:121
irep_idt mode
Language mode.
Definition: symbol.h:49
The Boolean constant true.
Definition: std_expr.h:3073
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition: std_expr.h:2081
The type of an expression, extends irept.
Definition: type.h:29
void compute_address_taken_functions(const exprt &src, std::unordered_set< irep_idt > &address_taken)
get all functions whose address is taken
Query Called Functions.
symbolt & get_fresh_aux_symbol(const typet &type, const std::string &name_prefix, const std::string &basename_prefix, const source_locationt &source_location, const irep_idt &symbol_mode, const namespacet &ns, symbol_table_baset &symbol_table)
Installs a fresh-named symbol with respect to the given namespace ns with the requested name pattern ...
Fresh auxiliary symbol creation.
Symbol Table + CFG.
#define Forall_goto_program_instructions(it, program)
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
double log(double x)
Definition: math.c:2776
API to expression classes for Pointers.
const dereference_exprt & to_dereference_expr(const exprt &expr)
Cast an exprt to a dereference_exprt.
Definition: pointer_expr.h:890
std::optional< mp_integer > pointer_offset_bits(const typet &type, const namespacet &ns)
Pointer Logic.
static std::string comment(const rw_set_baset::entryt &entry, bool write)
Definition: race_check.cpp:108
static bool arg_is_type_compatible(const typet &call_type, const typet &function_type, const namespacet &ns)
void remove_function_pointer(message_handlert &message_handler, symbol_tablet &symbol_table, goto_programt &goto_program, const irep_idt &function_id, goto_programt::targett target, const std::unordered_set< symbol_exprt, irep_hash > &functions)
Replace a call to a dynamic function at location target in the given goto-program by a case-split ove...
bool function_is_type_compatible(bool return_value_used, const code_typet &call_type, const code_typet &function_type, const namespacet &ns)
Returns true iff call_type can be converted to produce a function call of the same type as function_t...
static std::string function_pointer_assertion_comment(const std::unordered_set< symbol_exprt, irep_hash > &candidates)
static void fix_argument_types(code_function_callt &function_call)
void remove_function_pointers(message_handlert &_message_handler, goto_modelt &goto_model, bool only_remove_const_fps)
bool has_function_pointers(const goto_programt &goto_program)
static void fix_return_type(const irep_idt &in_function_id, code_function_callt &function_call, const source_locationt &source_location, symbol_tablet &symbol_table, goto_programt &dest)
Remove Indirect Function Calls.
void remove_skip(goto_programt &goto_program, goto_programt::targett begin, goto_programt::targett end)
remove unnecessary skip statements
Definition: remove_skip.cpp:87
Program Transformation.
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
API to expression classes.
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:272
const code_typet & to_code_type(const typet &type)
Cast a typet to a code_typet.
Definition: std_types.h:788
Stream & join_strings(Stream &&os, const It b, const It e, const Delimiter &delimiter, TransformFunc &&transform_func)
Prints items to an stream, separated by a constant delimiter.
Definition: string_utils.h:61