CBMC
require_goto_statements.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Unit test utilities
4 
5 Author: Diffblue Ltd.
6 
7 \*******************************************************************/
8 
10 
11 #include <util/expr_iterator.h>
12 #include <util/expr_util.h>
13 #include <util/namespace.h>
14 #include <util/pointer_expr.h>
15 #include <util/std_code.h>
16 #include <util/suffix.h>
17 #include <util/symbol_table_base.h>
18 
20 
23 
28 std::vector<codet>
30 {
31  std::vector<codet> statements;
32  for(auto sub_expression_it = function_value.depth_begin();
33  sub_expression_it != function_value.depth_end();
34  ++sub_expression_it)
35  {
36  if(sub_expression_it->id() == ID_code)
37  {
38  statements.push_back(to_code(*sub_expression_it));
39  }
40  }
41 
42  return statements;
43 }
44 
47 const std::vector<codet>
49  const symbol_table_baset &symbol_table)
50 {
51  // Retrieve __CPROVER_start
52  const symbolt *entry_point_function =
53  symbol_table.lookup(goto_functionst::entry_point());
54  REQUIRE(entry_point_function != nullptr);
55  return get_all_statements(entry_point_function->value);
56 }
57 
72  const std::vector<codet> &statements,
73  const irep_idt &structure_name,
74  const std::optional<irep_idt> &superclass_name,
75  const irep_idt &component_name,
76  const symbol_table_baset &symbol_table)
77 {
78  pointer_assignment_locationt locations{};
79 
80  for(const auto &assignment : statements)
81  {
82  if(assignment.get_statement() == ID_assign)
83  {
84  const code_assignt &code_assign = to_code_assign(assignment);
85 
86  if(code_assign.lhs().id() == ID_member)
87  {
88  const member_exprt &member_expr = to_member_expr(code_assign.lhs());
89 
90  if(superclass_name.has_value())
91  {
92  // Structure of the expression:
93  // member_exprt member_expr:
94  // - component name: \p component_name
95  // - operand (component of): member_exprt superclass_expr:
96  // - component name: @\p superclass_name
97  // - operand (component of): symbol for \p structure_name
98  if(
99  member_expr.get_component_name() == component_name &&
100  member_expr.compound().id() == ID_member)
101  {
102  const member_exprt &superclass_expr =
103  to_member_expr(member_expr.compound());
104  const irep_idt supercomponent_name =
105  "@" + id2string(superclass_name.value());
106 
108  const namespacet ns(symbol_table);
109  ode.build(superclass_expr, ns);
110  if(
111  superclass_expr.get_component_name() == supercomponent_name &&
113  .get_identifier() == structure_name)
114  {
115  if(
116  code_assign.rhs() ==
117  null_pointer_exprt(to_pointer_type(code_assign.lhs().type())))
118  {
119  locations.null_assignment = code_assign;
120  }
121  else
122  {
123  locations.non_null_assignments.push_back(code_assign);
124  }
125  }
126  }
127  }
128  else
129  {
130  // Structure of the expression:
131  // member_exprt member_expr:
132  // - component name: \p component_name
133  // - operand (component of): symbol for \p structure_name
134 
136  const namespacet ns(symbol_table);
137  ode.build(member_expr, ns);
138  if(
139  member_expr.get_component_name() == component_name &&
141  .get_identifier() == structure_name)
142  {
143  if(
144  code_assign.rhs() ==
145  null_pointer_exprt(to_pointer_type(code_assign.lhs().type())))
146  {
147  locations.null_assignment = code_assign;
148  }
149  else
150  {
151  locations.non_null_assignments.push_back(code_assign);
152  }
153  }
154  }
155  }
156  }
157  }
158  return locations;
159 }
160 
169  const std::vector<codet> &statements,
170  const irep_idt &component_name)
171 {
173 
174  for(const auto &assignment : statements)
175  {
176  if(assignment.get_statement() == ID_assign)
177  {
178  const code_assignt &code_assign = to_code_assign(assignment);
179 
180  if(code_assign.lhs().id() == ID_member)
181  {
182  const member_exprt &member_expr = to_member_expr(code_assign.lhs());
183  if(
184  member_expr.get_component_name() == component_name &&
185  member_expr.op().id() == ID_dereference)
186  {
187  const auto &pointer = to_dereference_expr(member_expr.op()).pointer();
188  if(
189  pointer.id() == ID_symbol &&
190  has_suffix(
191  id2string(to_symbol_expr(pointer).get_identifier()),
192  id2string(ID_this)))
193  {
194  if(
195  code_assign.rhs() ==
196  null_pointer_exprt(to_pointer_type(code_assign.lhs().type())))
197  {
198  locations.null_assignment = code_assign;
199  }
200  else
201  {
202  locations.non_null_assignments.push_back(code_assign);
203  }
204  }
205  }
206  }
207  }
208  }
209  return locations;
210 }
211 
220  const irep_idt &pointer_name,
221  const std::vector<codet> &instructions)
222 {
223  INFO("Looking for symbol: " << pointer_name);
224  std::regex special_chars{R"([-[\]{}()*+?.,\^$|#\s])"};
225  std::string sanitized =
226  std::regex_replace(id2string(pointer_name), special_chars, R"(\$&)");
228  std::regex("^" + sanitized + "$"), instructions);
229 }
230 
233  const std::regex &pointer_name_match,
234  const std::vector<codet> &instructions)
235 {
237  bool found_assignment = false;
238  std::vector<irep_idt> all_symbols;
239  for(const codet &statement : instructions)
240  {
241  if(statement.get_statement() == ID_assign)
242  {
243  const code_assignt &code_assign = to_code_assign(statement);
244  if(code_assign.lhs().id() == ID_symbol)
245  {
246  const symbol_exprt &symbol_expr = to_symbol_expr(code_assign.lhs());
247  all_symbols.push_back(symbol_expr.get_identifier());
248  if(
249  std::regex_search(
250  id2string(symbol_expr.get_identifier()), pointer_name_match))
251  {
252  if(
253  code_assign.rhs() ==
254  null_pointer_exprt(to_pointer_type(code_assign.lhs().type())))
255  {
256  locations.null_assignment = code_assign;
257  }
258  else
259  {
260  locations.non_null_assignments.push_back(code_assign);
261  }
262  found_assignment = true;
263  }
264  }
265  }
266  }
267 
268  std::ostringstream found_symbols;
269  for(const auto &entry : all_symbols)
270  {
271  found_symbols << entry << std::endl;
272  }
273  INFO("Symbols: " << found_symbols.str());
274  REQUIRE(found_assignment);
275 
276  return locations;
277 }
278 
286  const irep_idt &variable_name,
287  const std::vector<codet> &entry_point_instructions)
288 {
289  for(const auto &statement : entry_point_instructions)
290  {
291  if(statement.get_statement() == ID_decl)
292  {
293  const auto &decl_statement = to_code_decl(statement);
294 
295  if(decl_statement.get_identifier() == variable_name)
296  {
297  return decl_statement;
298  }
299  }
300  }
301  throw no_decl_found_exceptiont(variable_name.c_str());
302 }
303 
310  const std::vector<codet> &entry_point_instructions,
311  const irep_idt &symbol_identifier)
312 {
313  const auto &assignments = require_goto_statements::find_pointer_assignments(
314  symbol_identifier, entry_point_instructions)
316  REQUIRE(assignments.size() == 1);
317  return assignments[0].rhs();
318 }
319 
329  const std::vector<codet> &entry_point_instructions,
330  const irep_idt &symbol_identifier)
331 {
333  entry_point_instructions, symbol_identifier);
334 
335  return expr_try_dynamic_cast<symbol_exprt>(skip_typecast(expr));
336 }
337 
352 static const irep_idt &
354  const std::vector<codet> &entry_point_instructions,
355  const irep_idt &input_symbol_identifier)
356 {
357  const symbol_exprt *symbol_assigned_to_input_symbol =
359  entry_point_instructions, input_symbol_identifier);
360 
361  if(symbol_assigned_to_input_symbol)
362  {
364  entry_point_instructions,
365  symbol_assigned_to_input_symbol->get_identifier());
366  }
367 
368  return input_symbol_identifier;
369 }
370 
385  const irep_idt &structure_name,
386  const std::optional<irep_idt> &superclass_name,
387  const irep_idt &component_name,
388  const irep_idt &component_type_name,
389  const std::optional<irep_idt> &typecast_name,
390  const std::vector<codet> &entry_point_instructions,
391  const symbol_table_baset &symbol_table)
392 {
393  namespacet ns(symbol_table);
394 
395  // First we need to find the assignments to the component belonging to
396  // the structure_name object
397  const auto &component_assignments =
399  entry_point_instructions,
400  structure_name,
401  superclass_name,
402  component_name,
403  symbol_table);
404  INFO(
405  "looking for component assignment " << component_name << " in "
406  << structure_name);
407  REQUIRE(component_assignments.non_null_assignments.size() == 1);
408 
409  // We are expecting the non-null assignment to be of the form:
410  // malloc_site->(@superclass_name if given.)field =
411  // (possible typecast) malloc_site$0;
412  const symbol_exprt *rhs_symbol_expr = expr_try_dynamic_cast<symbol_exprt>(
413  skip_typecast(component_assignments.non_null_assignments[0].rhs()));
414  REQUIRE(rhs_symbol_expr);
415 
416  const irep_idt &symbol_identifier = get_ultimate_source_symbol(
417  entry_point_instructions, rhs_symbol_expr->get_identifier());
418 
419  // After we have found the declaration of the final assignment's
420  // right hand side, then we want to identify that the type
421  // is the one we expect, e.g.:
422  // struct java.lang.Integer *malloc_site$0;
423  const auto &component_declaration =
425  symbol_identifier, entry_point_instructions);
426  const typet &component_type =
427  to_pointer_type(component_declaration.symbol().type()).base_type();
428  REQUIRE(component_type.id() == ID_struct_tag);
429  const auto &component_struct =
430  ns.follow_tag(to_struct_tag_type(component_type));
431  REQUIRE(component_struct.get(ID_name) == component_type_name);
432 
433  return symbol_identifier;
434 }
435 
445 const irep_idt &
447  const irep_idt &structure_name,
448  const std::optional<irep_idt> &superclass_name,
449  const irep_idt &array_component_name,
450  const irep_idt &array_type_name,
451  const std::vector<codet> &entry_point_instructions,
452  const symbol_table_baset &symbol_table)
453 {
454  // First we need to find the assignments to the component belonging to
455  // the structure_name object
456  const auto &component_assignments =
458  entry_point_instructions,
459  structure_name,
460  superclass_name,
461  array_component_name,
462  symbol_table);
463  REQUIRE(component_assignments.non_null_assignments.size() == 1);
464 
465  // We are expecting that the resulting statement is of form:
466  // structure_name.array_component_name = (struct array_type_name *)
467  // tmp_object_factory$1;
468  const exprt &component_assignment_rhs_expr =
469  component_assignments.non_null_assignments[0].rhs();
470 
471  // The rhs is a typecast, deconstruct it to get the name of the variable
472  // and find the assignment to it
473  PRECONDITION(component_assignment_rhs_expr.id() == ID_typecast);
474  const auto &component_assignment_rhs =
475  to_typecast_expr(component_assignment_rhs_expr);
476  const auto &component_reference_tmp_name =
477  to_symbol_expr(component_assignment_rhs.op()).get_identifier();
478 
479  // Check the type we are casting to matches the array type
480  REQUIRE(component_assignment_rhs.type().id() == ID_pointer);
481  REQUIRE(
483  to_pointer_type(component_assignment_rhs.type()).base_type())
484  .get(ID_identifier) == array_type_name);
485 
486  // Get the tmp_object name and find assignments to it, there should be only
487  // one that assigns the correct array through side effect
488  const auto &component_reference_assignments =
490  component_reference_tmp_name, entry_point_instructions);
491  REQUIRE(component_reference_assignments.non_null_assignments.size() == 1);
492  const exprt &component_reference_assignment_rhs_expr =
493  component_reference_assignments.non_null_assignments[0].rhs();
494 
495  // The rhs is side effect
496  PRECONDITION(component_reference_assignment_rhs_expr.id() == ID_side_effect);
497  const auto &array_component_reference_assignment_rhs =
498  to_side_effect_expr(component_reference_assignment_rhs_expr);
499 
500  // The type of the side effect is an array with the correct element type
501  PRECONDITION(
502  array_component_reference_assignment_rhs.type().id() == ID_pointer);
503  const typet &array =
504  to_pointer_type(array_component_reference_assignment_rhs.type())
505  .base_type();
506  PRECONDITION(is_java_array_tag(array.get(ID_identifier)));
507  REQUIRE(array.get(ID_identifier) == array_type_name);
508 
509  return component_reference_tmp_name;
510 }
511 
518  const irep_idt &argument_name,
519  const std::vector<codet> &entry_point_statements)
520 {
521  // Trace the creation of the object that is being supplied as the input
522  // argument to the function under test
523  const pointer_assignment_locationt argument_assignments =
526  "::" + id2string(argument_name),
527  entry_point_statements);
528 
529  // There should be at most one assignment to it
530  REQUIRE(argument_assignments.non_null_assignments.size() == 1);
531 
532  // The following finds the name of the tmp object that gets assigned
533  // to the input argument. There must be one such assignment only,
534  // and it usually looks like this:
535  // argument_name = tmp_object_factory$1;
536  const auto &argument_assignment =
537  argument_assignments.non_null_assignments[0];
538  const symbol_exprt &argument_symbol =
539  to_symbol_expr(skip_typecast(argument_assignment.rhs()));
540  return argument_symbol.get_identifier();
541 }
542 
549 std::vector<code_function_callt> require_goto_statements::find_function_calls(
550  const std::vector<codet> &statements,
551  const irep_idt &function_call_identifier)
552 {
553  std::vector<code_function_callt> function_calls;
554  for(const codet &statement : statements)
555  {
556  if(statement.get_statement() == ID_function_call)
557  {
558  const code_function_callt &function_call =
559  to_code_function_call(statement);
560 
561  if(function_call.function().id() == ID_symbol)
562  {
563  if(
564  to_symbol_expr(function_call.function()).get_identifier() ==
565  function_call_identifier)
566  {
567  function_calls.push_back(function_call);
568  }
569  }
570  }
571  }
572  return function_calls;
573 }
A goto_instruction_codet representing an assignment in the program.
A goto_instruction_codet representing the declaration of a local variable.
goto_instruction_codet representation of a function call statement.
Data structure for representing an arbitrary statement in a program.
Definition: std_code_base.h:29
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
const char * c_str() const
Definition: dstring.h:116
Base class for all expressions.
Definition: expr.h:56
depth_iteratort depth_end()
Definition: expr.cpp:249
depth_iteratort depth_begin()
Definition: expr.cpp:247
typet & type()
Return the type of the expression.
Definition: expr.h:84
static irep_idt entry_point()
Get the identifier of the entry point to a goto model.
const irep_idt & get(const irep_idt &name) const
Definition: irep.cpp:44
const irep_idt & id() const
Definition: irep.h:388
Extract member of struct or union.
Definition: std_expr.h:2854
const exprt & compound() const
Definition: std_expr.h:2903
irep_idt get_component_name() const
Definition: std_expr.h:2876
const union_typet & follow_tag(const union_tag_typet &) const
Follow type tag of union type.
Definition: namespace.cpp:63
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:94
The null pointer constant.
Definition: pointer_expr.h:909
Split an expression into a base object and a (byte) offset.
Definition: pointer_expr.h:181
void build(const exprt &expr, const namespacet &ns)
Given an expression expr, attempt to find the underlying object it represents by skipping over type c...
static const exprt & root_object(const exprt &expr)
const typet & base_type() const
The type of the data what we point to.
Definition: pointer_expr.h:35
Expression to hold a symbol (variable)
Definition: std_expr.h:131
const irep_idt & get_identifier() const
Definition: std_expr.h:160
The symbol table base class interface.
const symbolt * lookup(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
Symbol table entry.
Definition: symbol.h:28
exprt value
Initial value of symbol.
Definition: symbol.h:34
The type of an expression, extends irept.
Definition: type.h:29
const exprt & op() const
Definition: std_expr.h:391
Forward depth-first search iterators These iterators' copy operations are expensive,...
const exprt & skip_typecast(const exprt &expr)
find the expression nested inside typecasts, if any
Definition: expr_util.cpp:193
Deprecated expression utility functions.
Goto Programs with Functions.
const code_assignt & to_code_assign(const goto_instruction_codet &code)
const code_function_callt & to_code_function_call(const goto_instruction_codet &code)
const code_declt & to_code_decl(const goto_instruction_codet &code)
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
bool is_java_array_tag(const irep_idt &tag)
See above.
Definition: java_types.cpp:233
irep_idt require_entry_point_argument_assignment(const irep_idt &argument_name, const std::vector< codet > &entry_point_statements)
Checks that the input argument (of method under test) with given name is assigned a single non-null o...
std::vector< code_function_callt > find_function_calls(const std::vector< codet > &statements, const irep_idt &function_call_identifier)
Verify that a collection of statements contains a function call to a function whose symbol identifier...
std::vector< codet > get_all_statements(const exprt &function_value)
Expand value of a function to include all child codets.
pointer_assignment_locationt find_struct_component_assignments(const std::vector< codet > &statements, const irep_idt &structure_name, const std::optional< irep_idt > &superclass_name, const irep_idt &component_name, const symbol_table_baset &symbol_table)
Find assignment statements of the form:
pointer_assignment_locationt find_pointer_assignments(const irep_idt &pointer_name, const std::vector< codet > &instructions)
For a given variable name, gets the assignments to it in the provided instructions.
const std::vector< codet > require_entry_point_statements(const symbol_table_baset &symbol_table)
pointer_assignment_locationt find_this_component_assignment(const std::vector< codet > &statements, const irep_idt &component_name)
Find assignment statements that set this->{component_name}.
irep_idt require_struct_component_assignment(const irep_idt &structure_name, const std::optional< irep_idt > &superclass_name, const irep_idt &component_name, const irep_idt &component_type_name, const std::optional< irep_idt > &typecast_name, const std::vector< codet > &entry_point_instructions, const symbol_table_baset &symbol_table)
Checks that the component of the structure (possibly inherited from the superclass) is assigned an ob...
const code_declt & require_declaration_of_name(const irep_idt &variable_name, const std::vector< codet > &entry_point_instructions)
Find the declaration of the specific variable.
const irep_idt & require_struct_array_component_assignment(const irep_idt &structure_name, const std::optional< irep_idt > &superclass_name, const irep_idt &array_component_name, const irep_idt &array_type_name, const std::vector< codet > &entry_point_instructions, const symbol_table_baset &symbol_table)
Checks that the array component of the structure (possibly inherited from the superclass) is assigned...
API to expression classes for Pointers.
const dereference_exprt & to_dereference_expr(const exprt &expr)
Cast an exprt to a dereference_exprt.
Definition: pointer_expr.h:890
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: pointer_expr.h:93
const symbol_exprt * try_get_unique_symbol_assigned_to_symbol(const std::vector< codet > &entry_point_instructions, const irep_idt &symbol_identifier)
Get the unique symbol assigned to a symbol, if one exists.
static const irep_idt & get_ultimate_source_symbol(const std::vector< codet > &entry_point_instructions, const irep_idt &input_symbol_identifier)
Follow the chain of non-null assignments until we find a symbol that hasn't ever had another symbol a...
const exprt & get_unique_non_null_expression_assigned_to_symbol(const std::vector< codet > &entry_point_instructions, const irep_idt &symbol_identifier)
Get the unique non-null expression assigned to a symbol.
Utilties for inspecting goto functions.
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
side_effect_exprt & to_side_effect_expr(exprt &expr)
Definition: std_code.h:1506
const codet & to_code(const exprt &expr)
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:272
const typecast_exprt & to_typecast_expr(const exprt &expr)
Cast an exprt to a typecast_exprt.
Definition: std_expr.h:2107
const member_exprt & to_member_expr(const exprt &expr)
Cast an exprt to a member_exprt.
Definition: std_expr.h:2946
const struct_tag_typet & to_struct_tag_type(const typet &type)
Cast a typet to a struct_tag_typet.
Definition: std_types.h:518
bool has_suffix(const std::string &s, const std::string &suffix)
Definition: suffix.h:17
Author: Diffblue Ltd.