CBMC
dfcc_lift_memory_predicates.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Dynamic frame condition checking for function contracts
4 
5 Author: Remi Delmas, delmasrd@amazon.com
6 Date: August 2022
7 
8 \*******************************************************************/
9 
11 
12 #include <util/cprover_prefix.h>
13 #include <util/format_expr.h>
14 #include <util/graph.h>
15 #include <util/pointer_expr.h>
16 #include <util/replace_symbol.h>
17 #include <util/symbol.h>
18 
20 
21 #include "dfcc_instrument.h"
22 #include "dfcc_library.h"
23 #include "dfcc_utils.h"
24 
26  goto_modelt &goto_model,
27  dfcc_libraryt &library,
28  dfcc_instrumentt &instrument,
29  message_handlert &message_handler)
30  : goto_model(goto_model),
31  library(library),
32  instrument(instrument),
33  log(message_handler)
34 {
35 }
36 
39  const irep_idt &function_id)
40 {
41  return lifted_parameters.find(function_id) != lifted_parameters.end();
42 }
43 
45 static bool is_core_memory_predicate(const irep_idt &function_id)
46 {
47  return (function_id == CPROVER_PREFIX "is_fresh") ||
48  (function_id == CPROVER_PREFIX "pointer_in_range_dfcc") ||
49  (function_id == CPROVER_PREFIX "obeys_contract");
50 }
51 
53  const goto_programt &goto_program,
54  const std::set<irep_idt> &predicates)
55 {
56  for(const auto &instruction : goto_program.instructions)
57  {
58  if(
59  instruction.is_function_call() &&
60  instruction.call_function().id() == ID_symbol)
61  {
62  const auto &callee_id =
63  to_symbol_expr(instruction.call_function()).get_identifier();
64 
65  if(
66  is_core_memory_predicate(callee_id) ||
67  predicates.find(callee_id) != predicates.end())
68  {
69  return true;
70  }
71  }
72  }
73  return false;
74 }
75 
77  std::set<irep_idt> &discovered_function_pointer_contracts)
78 {
79  std::set<irep_idt> candidates;
80  for(const auto &pair : goto_model.symbol_table)
81  {
82  if(
83  pair.second.type.id() == ID_code &&
84  !instrument.do_not_instrument(pair.first))
85  {
86  const auto &iter =
87  goto_model.goto_functions.function_map.find(pair.first);
88  if(
89  iter != goto_model.goto_functions.function_map.end() &&
90  iter->second.body_available())
91  {
92  candidates.insert(pair.first);
93  }
94  }
95  }
96 
97  std::set<irep_idt> predicates;
98 
99  // iterate until no new predicate is found
100  bool new_predicates_found = true;
101  while(new_predicates_found)
102  {
103  new_predicates_found = false;
104  for(const auto &function_id : candidates)
105  {
106  if(
107  predicates.find(function_id) == predicates.end() &&
109  goto_model.goto_functions.function_map[function_id].body, predicates))
110  {
111  predicates.insert(function_id);
112  new_predicates_found = true;
113  }
114  }
115  }
116 
117  if(predicates.empty())
118  return predicates;
119 
120  // some predicates were found, build dependency graph
121  struct dep_graph_nodet : public graph_nodet<empty_edget>
122  {
123  const irep_idt &function_id;
124  explicit dep_graph_nodet(const irep_idt &function_id)
125  : function_id(function_id)
126  {
127  }
128  };
129 
130  grapht<dep_graph_nodet> dep_graph;
131 
132  // inverse mapping from names to dep_graph_nodet identifiers
133  std::map<irep_idt, std::size_t> id_to_node_map;
134  // create all nodes
135  for(auto &predicate : predicates)
136  {
137  id_to_node_map.insert({predicate, dep_graph.add_node(predicate)});
138  }
139 
140  // create all edges
141  for(auto &caller : predicates)
142  {
143  const auto caller_id = id_to_node_map[caller];
144  for(const auto &instruction :
145  goto_model.goto_functions.function_map[caller].body.instructions)
146  {
147  if(
148  instruction.is_function_call() &&
149  instruction.call_function().id() == ID_symbol)
150  {
151  const auto &callee =
152  to_symbol_expr(instruction.call_function()).get_identifier();
153  if(predicates.find(callee) != predicates.end())
154  {
156  mstream << "Memory predicate dependency " << caller << " -> "
157  << callee << messaget::eom;
158  });
159  const auto callee_id = id_to_node_map[callee];
160  if(callee != caller) // do not add edges for self-recursive functions
161  dep_graph.add_edge(callee_id, caller_id);
162  }
163  }
164  }
165  }
166 
167  // lift in dependency order
168  auto sorted = dep_graph.topsort();
170  !sorted.empty(),
171  "could not determine instrumentation order for memory predicates, most "
172  "likely due to mutual recursion");
173  for(const auto &idx : sorted)
174  {
176  dep_graph[idx].function_id, discovered_function_pointer_contracts);
177  }
178 
179  return predicates;
180 }
181 
182 // returns an optional rank
183 static std::optional<std::size_t> is_param_expr(
184  const exprt &expr,
185  const std::map<irep_idt, std::size_t> &parameter_rank)
186 {
187  if(expr.id() == ID_typecast)
188  {
189  // ignore typecasts
190  return is_param_expr(to_typecast_expr(expr).op(), parameter_rank);
191  }
192  else if(expr.id() == ID_symbol)
193  {
194  const irep_idt &ident = to_symbol_expr(expr).get_identifier();
195  const auto found = parameter_rank.find(ident);
196  if(found != parameter_rank.end())
197  {
198  return {found->second};
199  }
200  else
201  {
202  return {};
203  }
204  }
205  else
206  {
207  // bail on anything else
208  return {};
209  }
210 }
211 
213  const irep_idt &function_id)
214 {
215  const symbolt &function_symbol =
217  // map of parameter name to its rank in the signature
218  std::map<irep_idt, std::size_t> parameter_rank;
219  const auto &parameters = to_code_type(function_symbol.type).parameters();
220  for(std::size_t rank = 0; rank < parameters.size(); rank++)
221  {
222  parameter_rank[parameters[rank].get_identifier()] = rank;
223  }
224  const goto_programt &body =
225  goto_model.goto_functions.function_map[function_id].body;
226  for(const auto &it : body.instructions)
227  {
228  // for all function calls, if a parameter of function_id is passed
229  // in a lifted position, add its rank to the set of lifted parameters
230  if(it.is_function_call() && it.call_function().id() == ID_symbol)
231  {
232  const irep_idt &callee_id =
233  to_symbol_expr(it.call_function()).get_identifier();
234  if(callee_id == CPROVER_PREFIX "is_fresh")
235  {
236  auto opt_rank = is_param_expr(it.call_arguments()[0], parameter_rank);
237  if(opt_rank.has_value())
238  {
239  lifted_parameters[function_id].insert(opt_rank.value());
240  }
241  }
242  else if(callee_id == CPROVER_PREFIX "pointer_in_range_dfcc")
243  {
244  auto opt_rank = is_param_expr(it.call_arguments()[1], parameter_rank);
245  if(opt_rank.has_value())
246  {
247  lifted_parameters[function_id].insert(opt_rank.value());
248  }
249  }
250  else if(callee_id == CPROVER_PREFIX "obeys_contract")
251  {
252  auto opt_rank = is_param_expr(it.call_arguments()[0], parameter_rank);
253  if(opt_rank.has_value())
254  {
255  lifted_parameters[function_id].insert(opt_rank.value());
256  }
257  }
258  else if(is_lifted_function(callee_id))
259  {
260  // search wether a parameter of function_id is passed to a lifted
261  // parameter of callee_id
262  for(const std::size_t callee_rank : lifted_parameters[callee_id])
263  {
264  auto opt_rank =
265  is_param_expr(it.call_arguments()[callee_rank], parameter_rank);
266  if(opt_rank.has_value())
267  {
268  lifted_parameters[function_id].insert(opt_rank.value());
269  }
270  }
271  }
272  }
273  }
274 }
275 
277  const irep_idt &function_id,
278  const std::size_t parameter_rank,
279  replace_symbolt &replace_lifted_param)
280 {
281  symbolt &function_symbol =
283  code_typet &code_type = to_code_type(function_symbol.type);
284  auto &parameters = code_type.parameters();
285  auto &parameter_id = parameters[parameter_rank].get_identifier();
286  auto entry = goto_model.symbol_table.symbols.find(parameter_id);
288  mstream << "adding pointer type to " << function_id << " " << parameter_id
289  << messaget::eom;
290  });
291  const symbolt &old_symbol = entry->second;
292  const auto &old_symbol_expr = old_symbol.symbol_expr();
293  // create new parameter symbol, same everything except type
294  symbolt new_symbol(
295  old_symbol.name, pointer_type(old_symbol.type), old_symbol.mode);
296  new_symbol.base_name = old_symbol.base_name;
297  new_symbol.location = old_symbol.location;
298  new_symbol.module = old_symbol.module;
299  new_symbol.is_lvalue = old_symbol.is_lvalue;
300  new_symbol.is_state_var = old_symbol.is_state_var;
301  new_symbol.is_thread_local = old_symbol.is_thread_local;
302  new_symbol.is_file_local = old_symbol.is_file_local;
303  new_symbol.is_auxiliary = old_symbol.is_auxiliary;
304  new_symbol.is_parameter = old_symbol.is_parameter;
306  std::pair<symbolt &, bool> res =
307  goto_model.symbol_table.insert(std::move(new_symbol));
308  CHECK_RETURN(res.second);
309  replace_lifted_param.insert(
310  old_symbol_expr, dereference_exprt(res.first.symbol_expr()));
311  code_typet::parametert parameter(res.first.type);
312  parameter.set_base_name(res.first.base_name);
313  parameter.set_identifier(res.first.name);
314  parameters[parameter_rank] = parameter;
315 }
316 
318  const irep_idt &function_id,
319  std::set<irep_idt> &discovered_function_pointer_contracts)
320 {
321  replace_symbolt replace_lifted_params;
322  // add pointer types to all parameters that require it
323  for(const auto rank : lifted_parameters[function_id])
324  {
325  add_pointer_type(function_id, rank, replace_lifted_params);
326  }
327  auto &body = goto_model.goto_functions.function_map[function_id].body;
328  // update the function body
329  for(auto &instruction : body.instructions)
330  {
331  // rewrite all occurrences of lifted parameters
332  instruction.transform([&replace_lifted_params](exprt expr) {
333  const bool changed = !replace_lifted_params.replace(expr);
334  return changed ? std::optional<exprt>{expr} : std::nullopt;
335  });
336 
337  // add address-of to all arguments expressions passed in lifted position to
338  // another memory predicate
339  if(
340  instruction.is_function_call() &&
341  instruction.call_function().id() == ID_symbol)
342  {
343  // add address-of to arguments that are passed in lifted position
344  auto &callee_id =
345  to_symbol_expr(instruction.call_function()).get_identifier();
346  if(is_lifted_function(callee_id))
347  {
348  for(const auto &rank : lifted_parameters[callee_id])
349  {
350  const auto arg = instruction.call_arguments()[rank];
352  mstream << "Adding address_of to call " << callee_id
353  << ", argument " << format(arg) << messaget::eom;
354  });
355  instruction.call_arguments()[rank] = address_of_exprt(arg);
356  }
357  }
358  }
359  }
360 }
361 
363  const irep_idt &function_id,
364  std::set<irep_idt> &discovered_function_pointer_contracts)
365 {
366  // when this function_id gets processed, any memory predicate it calls has
367  // already been processed (except itself if it is recursive);
368 
369  log.status() << "Instrumenting memory predicate '" << function_id << "'"
370  << messaget::eom;
371 
372  // first step: identify which parameters are passed directly to core
373  // predicates of lifted positions in user-defined memory predicates and mark
374  // them as lifted
375  collect_parameters_to_lift(function_id);
377  function_id, discovered_function_pointer_contracts);
378 
380  mstream << "Ranks of lifted parameters for " << function_id << ": ";
381  for(const auto rank : lifted_parameters[function_id])
382  mstream << rank << ", ";
383  mstream << messaget::eom;
384  });
385 
386  // instrument the function for side effects: adds the write_set parameter,
387  // adds checks for side effects, maps core predicates to their implementation.
389  function_id,
390  loop_contract_configt{false},
391  discovered_function_pointer_contracts);
392 }
393 
395 {
396  fix_calls(program, program.instructions.begin(), program.instructions.end());
397 }
398 
400  goto_programt &program,
401  goto_programt::targett first_instruction,
402  const goto_programt::targett &last_instruction)
403 {
404  for(auto target = first_instruction; target != last_instruction; target++)
405  {
406  if(target->is_function_call())
407  {
408  const auto &function = target->call_function();
409 
410  if(function.id() == ID_symbol)
411  {
412  const irep_idt &fun_name = to_symbol_expr(function).get_identifier();
413 
414  if(is_lifted_function(fun_name))
415  {
416  // add address-of on all lifted argumnents
417  for(const auto rank : lifted_parameters[fun_name])
418  {
419  target->call_arguments()[rank] =
420  address_of_exprt(target->call_arguments()[rank]);
421  }
422  }
423  }
424  }
425  }
426 }
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:235
Operator to return the address of an object.
Definition: pointer_expr.h:540
void set_base_name(const irep_idt &name)
Definition: std_types.h:629
void set_identifier(const irep_idt &identifier)
Definition: std_types.h:624
Base type of functions.
Definition: std_types.h:583
const parameterst & parameters() const
Definition: std_types.h:699
Operator to dereference a pointer.
Definition: pointer_expr.h:834
This class instruments GOTO functions or instruction sequences for frame condition checking and loop ...
void instrument_function(const irep_idt &function_id, const loop_contract_configt &loop_contract_config, std::set< irep_idt > &function_pointer_contracts)
Instruments a GOTO function by adding an extra write set parameter and instrumenting its body instruc...
bool do_not_instrument(const irep_idt &id) const
True iff the symbol must not be instrumented because it is an internal symbol or a CPROVER symbol.
Class interface to library types and functions defined in cprover_contracts.c.
Definition: dfcc_library.h:154
void fix_calls(goto_programt &program)
Fixes calls to user-defined memory predicates in the given program, by adding an address-of to argume...
void collect_parameters_to_lift(const irep_idt &function_id)
Computes the subset of function parameters of function_id that are passed directly to core predicates...
void lift_predicate(const irep_idt &function_id, std::set< irep_idt > &discovered_function_pointer_contracts)
void add_pointer_type(const irep_idt &function_id, const std::size_t parameter_rank, replace_symbolt &replace_lifted_param)
adds a pointer_type to the parameter of a function
dfcc_lift_memory_predicatest(goto_modelt &goto_model, dfcc_libraryt &library, dfcc_instrumentt &instrument, message_handlert &message_handler)
std::map< irep_idt, std::set< std::size_t > > lifted_parameters
void lift_parameters_and_update_body(const irep_idt &function_id, std::set< irep_idt > &discovered_function_pointer_contracts)
bool calls_memory_predicates(const goto_programt &goto_program, const std::set< irep_idt > &predicates)
Returns true iff goto_program calls core memory predicates.
bool is_lifted_function(const irep_idt &function_id)
True if a function at least had one of its parameters lifted.
std::set< irep_idt > lift_predicates(std::set< irep_idt > &discovered_function_pointer_contracts)
Collects and lifts all user-defined memory predicates.
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
Base class for all expressions.
Definition: expr.h:56
function_mapt function_map
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:31
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:34
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:73
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:622
instructionst::iterator targett
Definition: goto_program.h:614
This class represents a node in a directed graph.
Definition: graph.h:35
A generic directed graph with a parametric node type.
Definition: graph.h:167
node_indext add_node(arguments &&... values)
Definition: graph.h:180
void add_edge(node_indext a, node_indext b)
Definition: graph.h:232
std::list< node_indext > topsort() const
Find a topological order of the nodes if graph is DAG, return empty list for non-DAG or empty graph.
Definition: graph.h:879
const irep_idt & id() const
Definition: irep.h:388
mstreamt & status() const
Definition: message.h:406
void conditional_output(mstreamt &mstream, const std::function< void(mstreamt &)> &output_generator) const
Generate output to message_stream using output_generator if the configured verbosity is at least as h...
Definition: message.cpp:139
mstreamt & debug() const
Definition: message.h:421
static eomt eom
Definition: message.h:289
Replace a symbol expression by a given expression.
virtual bool replace(exprt &dest) const
void insert(const class symbol_exprt &old_expr, const exprt &new_expr)
Sets old_expr to be replaced by new_expr if we don't already have a replacement; otherwise does nothi...
const irep_idt & get_identifier() const
Definition: std_expr.h:160
const symbolst & symbols
Read-only field, used to look up symbols given their names.
virtual void erase(const symbolst::const_iterator &entry) override
Remove a symbol from the symbol table.
virtual std::pair< symbolt &, bool > insert(symbolt symbol) override
Author: Diffblue Ltd.
Symbol table entry.
Definition: symbol.h:28
irep_idt base_name
Base (non-scoped) name.
Definition: symbol.h:46
bool is_auxiliary
Definition: symbol.h:77
bool is_file_local
Definition: symbol.h:73
irep_idt module
Name of module the symbol belongs to.
Definition: symbol.h:43
bool is_parameter
Definition: symbol.h:76
bool is_thread_local
Definition: symbol.h:71
source_locationt location
Source code location of definition of symbol.
Definition: symbol.h:37
bool is_state_var
Definition: symbol.h:66
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:121
typet type
Type of symbol.
Definition: symbol.h:31
irep_idt name
The unique identifier.
Definition: symbol.h:40
bool is_lvalue
Definition: symbol.h:72
irep_idt mode
Language mode.
Definition: symbol.h:49
#define CPROVER_PREFIX
Add instrumentation to a goto program to perform frame condition checks.
Dynamic frame condition checking library loading.
static std::optional< std::size_t > is_param_expr(const exprt &expr, const std::map< irep_idt, std::size_t > &parameter_rank)
static bool is_core_memory_predicate(const irep_idt &function_id)
True iff function_id is a core memory predicate.
Collects all user-defined predicates that call functions is_fresh, pointer_in_range,...
Dynamic frame condition checking utility functions.
Symbol Table + CFG.
A Template Class for Graphs.
double log(double x)
Definition: math.c:2776
API to expression classes for Pointers.
#define PRECONDITION_WITH_DIAGNOSTICS(CONDITION,...)
Definition: invariant.h:464
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:272
const typecast_exprt & to_typecast_expr(const exprt &expr)
Cast an exprt to a typecast_exprt.
Definition: std_expr.h:2102
const code_typet & to_code_type(const typet &type)
Cast a typet to a code_typet.
Definition: std_types.h:788
static symbolt & get_function_symbol(symbol_table_baset &, const irep_idt &function_id)
Returns the symbolt for function_id.
Definition: dfcc_utils.cpp:72
Loop contract configurations.
Symbol table entry.