CBMC
contracts_wrangler.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Parse and annotate contracts from configuration files
4 
5 Author: Qinheping Hu
6 
7 Date: June 2023
8 
9 \*******************************************************************/
10 
13 
14 #include "contracts_wrangler.h"
15 
16 #include <util/c_types.h>
17 #include <util/expr_iterator.h>
18 #include <util/format_expr.h>
19 #include <util/pointer_expr.h>
20 #include <util/simplify_expr.h>
21 #include <util/std_types.h>
22 
24 
25 #include <ansi-c/ansi_c_parser.h>
28 
30  goto_modelt &goto_model,
31  const std::string &file_name,
32  message_handlert &message_handler)
33  : ns(goto_model.symbol_table),
34  goto_model(goto_model),
35  symbol_table(goto_model.symbol_table),
36  goto_functions(goto_model.goto_functions),
37  message_handler(message_handler)
38 {
39  jsont configuration;
40 
41  if(parse_json(file_name, message_handler, configuration))
42  throw deserialization_exceptiont(file_name + " is not a valid JSON file");
43 
44  configure_functions(configuration);
45 
46  // Mangle loop contracts function by function.
47  // We match function with a regex. There should one and only one function
48  // with name matched by the regex.
49  for(const auto &function_entry : this->functions)
50  {
51  bool function_found = false;
52 
53  for(const auto &function : goto_model.goto_functions.function_map)
54  {
55  // We find a matched function.
56  if(std::regex_match(function.first.c_str(), function_entry.first))
57  {
58  if(function_found)
60  "function regex " + function_entry.second.regex_str +
61  " matches more than one function");
62 
63  function_found = true;
64 
65  // Mangle all loop contracts in the function.
66  for(const auto &loop_entry : function_entry.second.loop_contracts)
67  {
68  mangle(loop_entry, function.first);
69  }
70  }
71  }
72 
73  if(!function_found)
75  "function regex " + function_entry.second.regex_str +
76  " matches no function");
77  }
78 }
79 
81  std::string function_name,
82  const std::size_t num_of_args)
83 {
84  // Already exist.
85  if(symbol_table.has_symbol(CPROVER_PREFIX + function_name))
86  return;
87 
88  code_typet::parameterst parameters;
89  for(unsigned i = 0; i < num_of_args; i++)
90  {
91  parameters.emplace_back(pointer_type(void_type()));
92  }
93  symbolt fun_symbol(
94  CPROVER_PREFIX + function_name,
95  code_typet(parameters, empty_typet()),
96  ID_C);
97  symbol_table.add(fun_symbol);
98 }
99 
101  const loop_contracts_clauset &loop_contracts,
102  const irep_idt &function_id)
103 {
105  // Loop contracts mangling consists of four steps.
106  //
107  // 1. Construct a fake function with a fake loop that contains the loop
108  // contracts for parsing the contracts.
109  // 2. Parse the fake function and extract the contracts as exprt.
110  // 3. Replace symbols in the extracted exprt with the symbols in the
111  // symbol table of the goto model according to the symbol map provided by
112  // the user.
113  // 4. Typecheck all contracts exprt.
114  // 5. Annotate all contracts exprt to the corresponding loop.
115 
116  // Construct a fake function to parse.
117  // void function_id()
118  // {
119  // while(1==1)
120  // __CPROVER_assigns(assigns_str) // optional
121  // __CPROVER_loop_invariant(invariants_str)
122  // __CPROVER_decreases(decreases_str) // optional
123  // }
124  std::ostringstream pr;
125  pr << "void _fn() { while(1==1)";
126  if(!loop_contracts.assigns.empty())
127  {
128  pr << CPROVER_PREFIX << "assigns(" << loop_contracts.assigns << ") ";
129  }
130  pr << CPROVER_PREFIX << "loop_invariant(" << loop_contracts.invariants
131  << ") ";
132  if(!loop_contracts.decreases.empty())
133  {
134  pr << CPROVER_PREFIX << "decreases(" << loop_contracts.decreases << ") ";
135  }
136  pr << " {}}" << std::endl;
137 
138  log.debug() << "Constructing fake function:\n" << pr.str() << log.eom;
139 
140  // Parse the fake function.
141  std::istringstream is(pr.str());
142  ansi_c_parsert ansi_c_parser{message_handler};
143  ansi_c_parser.set_line_no(0);
144  ansi_c_parser.set_file("<predicate>");
145  ansi_c_parser.in = &is;
146  ansi_c_parser.for_has_scope = config.ansi_c.for_has_scope;
147  ansi_c_parser.ts_18661_3_Floatn_types = config.ansi_c.ts_18661_3_Floatn_types;
148  ansi_c_parser.__float128_is_keyword = config.ansi_c.__float128_is_keyword;
149  ansi_c_parser.float16_type = config.ansi_c.float16_type;
150  ansi_c_parser.bf16_type = config.ansi_c.bf16_type;
151  ansi_c_parser.fp16_type = config.ansi_c.fp16_type;
152  ansi_c_parser.cpp98 = false; // it's not C++
153  ansi_c_parser.cpp11 = false; // it's not C++
154  ansi_c_parser.mode = config.ansi_c.mode;
155  ansi_c_scanner_init(ansi_c_parser);
156  ansi_c_parser.parse();
157 
158  // Extract the invariants from prase_tree.
159  exprt &inv_expr(static_cast<exprt &>(ansi_c_parser.parse_tree.items.front()
160  .declarator()
161  .value()
162  .operands()[0]
163  .add(ID_C_spec_loop_invariant))
164  .operands()[0]);
165 
166  log.debug() << "Extracted loop invariants: " << inv_expr.pretty() << "\n"
167  << log.eom;
168 
169  // Extract the assigns from parse_tree.
170  std::optional<exprt> assigns_expr;
171  if(!loop_contracts.assigns.empty())
172  {
173  assigns_expr = static_cast<exprt &>(ansi_c_parser.parse_tree.items.front()
174  .declarator()
175  .value()
176  .operands()[0]
177  .add(ID_C_spec_assigns))
178  .operands()[0];
179  }
180 
181  // Extract the decreases from parse_tree.
182  exprt::operandst decreases_exprs;
183  if(!loop_contracts.decreases.empty())
184  {
185  for(auto op : static_cast<exprt &>(ansi_c_parser.parse_tree.items.front()
186  .declarator()
187  .value()
188  .operands()[0]
189  .add(ID_C_spec_decreases))
190  .operands())
191  {
192  decreases_exprs.emplace_back(op);
193  }
194  }
195 
196  // Replace symbols in the clauses with the symbols in the symbol table.
197  log.debug() << "Start to replace symbols" << log.eom;
198  loop_contracts.replace_symbol(inv_expr);
199  if(assigns_expr.has_value())
200  {
201  loop_contracts.replace_symbol(assigns_expr.value());
202  }
203  for(exprt &decrease_expr : decreases_exprs)
204  {
205  loop_contracts.replace_symbol(decrease_expr);
206  }
207 
208  // Add builtin functions that might be used in contracts to the symbol table.
209  add_builtin_pointer_function_symbol("loop_entry", 1);
210  add_builtin_pointer_function_symbol("same_object", 2);
211  add_builtin_pointer_function_symbol("OBJECT_SIZE", 1);
212  add_builtin_pointer_function_symbol("POINTER_OBJECT", 1);
213  add_builtin_pointer_function_symbol("POINTER_OFFSET", 1);
214 
215  // Typecheck clauses.
217  ansi_c_parser.parse_tree,
218  symbol_table,
219  symbol_table.lookup_ref(function_id).module.c_str(),
220  log.get_message_handler());
221 
222  // Typecheck and annotate invariants.
223  {
224  log.debug() << "Start to typecheck invariants." << log.eom;
225  ansi_c_typecheck.typecheck_expr(inv_expr);
226  if(has_subexpr(inv_expr, ID_old))
228  "old is not allowed in loop invariants.");
229 
230  invariant_mapt inv_map;
231  inv_map[loop_idt(function_id, std::stoi(loop_contracts.identifier))] =
232  and_exprt(simplify_expr(inv_expr, ns), true_exprt());
234  }
235 
236  // Typecheck and annotate assigns.
237  log.debug() << "Start to typecheck assigns." << log.eom;
238  if(assigns_expr.has_value())
239  {
240  ansi_c_typecheck.typecheck_spec_assigns(assigns_expr.value().operands());
241 
242  invariant_mapt assigns_map;
243  assigns_map[loop_idt(function_id, std::stoi(loop_contracts.identifier))] =
244  assigns_expr.value();
245  annotate_assigns(assigns_map, goto_model);
246  }
247 
248  // Typecheck an annotate decreases.
249  log.debug() << "Start to typecheck decreases." << log.eom;
250  if(!decreases_exprs.empty())
251  {
252  std::map<loop_idt, std::vector<exprt>> decreases_map;
253  decreases_map[loop_idt(function_id, std::stoi(loop_contracts.identifier))] =
254  std::vector<exprt>();
255  for(exprt &decrease_expr : decreases_exprs)
256  {
257  ansi_c_typecheck.typecheck_expr(decrease_expr);
258  decreases_map[loop_idt(function_id, std::stoi(loop_contracts.identifier))]
259  .emplace_back(decrease_expr);
260  }
261  annotate_decreases(decreases_map, goto_model);
262  }
263 
264  log.debug() << "Mangling finished." << log.eom;
265  // Remove added symbol.
266  symbol_table.remove(CPROVER_PREFIX "loop_entry");
267  symbol_table.remove(CPROVER_PREFIX "same_object");
268  symbol_table.remove(CPROVER_PREFIX "OBJECT_SIZE");
269  symbol_table.remove(CPROVER_PREFIX "POINTER_OBJECT");
270  symbol_table.remove(CPROVER_PREFIX "POINTER_OFFSET");
271 }
272 
274 {
275  auto functions = config["functions"];
276 
277  if(functions.is_null())
278  return;
279 
280  if(!functions.is_array())
281  throw deserialization_exceptiont("functions entry must be sequence");
282 
283  for(const auto &function : to_json_array(functions))
284  {
285  if(!function.is_object())
286  throw deserialization_exceptiont("function entry must be object");
287 
288  for(const auto &function_entry : to_json_object(function))
289  {
290  const auto function_name = function_entry.first;
291  const auto &items = function_entry.second;
292 
293  if(!items.is_array())
294  throw deserialization_exceptiont("loops entry must be sequence");
295 
296  this->functions.emplace_back(function_name, functiont{});
297  functiont &function_config = this->functions.back().second;
298  function_config.regex_str = function_name;
299 
300  for(const auto &function_item : to_json_array(items))
301  {
302  if(!function_item.is_object())
303  throw deserialization_exceptiont("loop entry must be object");
304 
305  std::string loop_id = "";
306  std::string invariants_str = "";
307  std::string assigns_str = "";
308  std::string decreases_str = "";
309  unchecked_replace_symbolt replace_symbol;
310 
311  for(const auto &loop_item : to_json_object(function_item))
312  {
313  if(!loop_item.second.is_string())
314  throw deserialization_exceptiont("loop item must be string");
315 
316  if(loop_item.first == "loop_id")
317  {
318  loop_id = loop_item.second.value;
319  continue;
320  }
321 
322  if(loop_item.first == "assigns")
323  {
324  assigns_str = loop_item.second.value;
325  continue;
326  }
327 
328  if(loop_item.first == "decreases")
329  {
330  decreases_str = loop_item.second.value;
331  continue;
332  }
333 
334  if(loop_item.first == "invariants")
335  {
336  invariants_str = loop_item.second.value;
337  continue;
338  }
339 
340  if(loop_item.first == "symbol_map")
341  {
342  std::string symbol_map_str = loop_item.second.value;
343 
344  // Remove all space in symbol_map_str
345  symbol_map_str.erase(
346  std::remove_if(
347  symbol_map_str.begin(), symbol_map_str.end(), isspace),
348  symbol_map_str.end());
349 
350  for(const auto &symbol_map_entry :
351  split_string(symbol_map_str, ';'))
352  {
353  const auto symbol_map_pair = split_string(symbol_map_entry, ',');
354  const auto &symbol = symbol_table.lookup(symbol_map_pair.back());
355  if(symbol == nullptr)
357  "symbol with id \"" + symbol_map_pair.front() +
358  "\" does not exist in the symbol table");
359  // Create a dummy symbol. The type will be discarded when inserted
360  // into replace_symbol.
361  const symbol_exprt old_expr(
362  symbol_map_pair.front(), bool_typet());
363  replace_symbol.insert(old_expr, symbol->symbol_expr());
364  }
365 
366  continue;
367  }
368 
369  throw deserialization_exceptiont("unexpected loop item");
370  }
371 
372  if(loop_id.empty())
373  {
375  "loop entry must have one identifier");
376  }
377 
378  if(invariants_str.empty())
379  {
381  "loop entry must have one invariant string");
382  }
383 
384  function_config.loop_contracts.emplace_back(
385  loop_id, invariants_str, assigns_str, decreases_str, replace_symbol);
386  }
387  }
388  }
389 }
void ansi_c_scanner_init(ansi_c_parsert &)
bool ansi_c_typecheck(ansi_c_parse_treet &ansi_c_parse_tree, symbol_table_baset &symbol_table, const std::string &module, message_handlert &message_handler)
ANSI-C Language Type Checking.
configt config
Definition: config.cpp:25
empty_typet void_type()
Definition: c_types.cpp:245
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:235
Boolean AND.
Definition: std_expr.h:2120
The Boolean type.
Definition: std_types.h:36
Base type of functions.
Definition: std_types.h:583
std::vector< parametert > parameterst
Definition: std_types.h:585
struct configt::ansi_ct ansi_c
goto_modelt & goto_model
symbol_tablet & symbol_table
message_handlert & message_handler
void configure_functions(const jsont &)
void add_builtin_pointer_function_symbol(std::string function_name, const std::size_t num_of_args)
Add builtin function symbol with function_name into symbol table.
contracts_wranglert(goto_modelt &goto_model, const std::string &file_name, message_handlert &message_handler)
void mangle(const loop_contracts_clauset &loop_contracts, const irep_idt &function_id)
Mangle loop_contracts in the function with function_id
Thrown when failing to deserialize a value from some low level format, like JSON or raw bytes.
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
const char * c_str() const
Definition: dstring.h:116
The empty type.
Definition: std_types.h:51
Base class for all expressions.
Definition: expr.h:56
std::vector< exprt > operandst
Definition: expr.h:58
operandst & operands()
Definition: expr.h:94
function_mapt function_map
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:34
Thrown when user-provided input cannot be processed.
std::string pretty(unsigned indent=0, unsigned max_indent=0) const
Definition: irep.cpp:482
Definition: json.h:27
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:154
Expression to hold a symbol (variable)
Definition: std_expr.h:131
bool remove(const irep_idt &name)
Remove a symbol from the symbol table.
const symbolt * lookup(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
Symbol table entry.
Definition: symbol.h:28
irep_idt module
Name of module the symbol belongs to.
Definition: symbol.h:43
The Boolean constant true.
Definition: std_expr.h:3063
void insert(const symbol_exprt &old_expr, const exprt &new_expr)
Parse and annotate contracts.
#define CPROVER_PREFIX
int isspace(int c)
Definition: ctype.c:80
Forward depth-first search iterators These iterators' copy operations are expensive,...
bool has_subexpr(const exprt &expr, const std::function< bool(const exprt &)> &pred)
returns true if the expression has a subexpression that satisfies pred
Definition: expr_util.cpp:115
json_arrayt & to_json_array(jsont &json)
Definition: json.h:424
json_objectt & to_json_object(jsont &json)
Definition: json.h:442
bool parse_json(std::istream &in, const std::string &filename, message_handlert &message_handler, jsont &dest)
Definition: json_parser.cpp:27
double log(double x)
Definition: math.c:2776
API to expression classes for Pointers.
Unused function removal.
exprt simplify_expr(exprt src, const namespacet &ns)
Pre-defined types.
void split_string(const std::string &s, char delim, std::vector< std::string > &result, bool strip, bool remove_empty)
bool for_has_scope
Definition: config.h:151
bool float16_type
Definition: config.h:155
bool bf16_type
Definition: config.h:156
bool ts_18661_3_Floatn_types
Definition: config.h:152
bool __float128_is_keyword
Definition: config.h:154
bool fp16_type
Definition: config.h:157
flavourt mode
Definition: config.h:256
std::string regex_str
std::vector< loop_contracts_clauset > loop_contracts
unchecked_replace_symbolt replace_symbol
Loop id used to identify loops.
Definition: loop_ids.h:28
void annotate_decreases(const std::map< loop_idt, std::vector< exprt >> &decreases_map, goto_modelt &goto_model)
Annotate the decreases in decreases_map to their corresponding loops.
Definition: utils.cpp:766
void annotate_invariants(const invariant_mapt &invariant_map, goto_modelt &goto_model)
Annotate the invariants in invariant_map to their corresponding loops.
Definition: utils.cpp:705
void annotate_assigns(const std::map< loop_idt, std::set< exprt >> &assigns_map, goto_modelt &goto_model)
Annotate the assigns in assigns_map to their corresponding loops.
Definition: utils.cpp:725
std::map< loop_idt, exprt > invariant_mapt
Definition: utils.h:31