CBMC
Loading...
Searching...
No Matches
contracts_wrangler.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: Parse and annotate contracts from configuration files
4
5Author: Qinheping Hu
6
7Date: June 2023
8
9\*******************************************************************/
10
13
14#include "contracts_wrangler.h"
15
16#include <util/c_types.h>
17#include <util/expr_iterator.h>
18#include <util/format_expr.h>
19#include <util/pointer_expr.h>
20#include <util/simplify_expr.h>
21#include <util/std_types.h>
22
24
28
30 goto_modelt &goto_model,
31 const std::string &file_name,
32 message_handlert &message_handler)
33 : ns(goto_model.symbol_table),
34 goto_model(goto_model),
35 symbol_table(goto_model.symbol_table),
36 goto_functions(goto_model.goto_functions),
37 message_handler(message_handler)
38{
39 jsont configuration;
40
41 if(parse_json(file_name, message_handler, configuration))
42 throw deserialization_exceptiont(file_name + " is not a valid JSON file");
43
44 configure_functions(configuration);
45
46 // Mangle loop contracts function by function.
47 // We match function with a regex. There should one and only one function
48 // with name matched by the regex.
49 for(const auto &function_entry : this->functions)
50 {
51 bool function_found = false;
52
53 for(const auto &function : goto_model.goto_functions.function_map)
54 {
55 // We find a matched function.
56 if(std::regex_match(function.first.c_str(), function_entry.first))
57 {
60 "function regex " + function_entry.second.regex_str +
61 " matches more than one function");
62
63 function_found = true;
64
65 // Mangle all loop contracts in the function.
66 for(const auto &loop_entry : function_entry.second.loop_contracts)
67 {
68 mangle(loop_entry, function.first);
69 }
70 }
71 }
72
75 "function regex " + function_entry.second.regex_str +
76 " matches no function");
77 }
78}
79
81 std::string function_name,
82 const std::size_t num_of_args)
83{
84 // Already exist.
85 if(symbol_table.has_symbol(CPROVER_PREFIX + function_name))
86 return;
87
88 code_typet::parameterst parameters;
89 for(unsigned i = 0; i < num_of_args; i++)
90 {
91 parameters.emplace_back(pointer_type(void_type()));
92 }
94 CPROVER_PREFIX + function_name,
95 code_typet(parameters, empty_typet()),
96 ID_C);
98}
99
101 const loop_contracts_clauset &loop_contracts,
102 const irep_idt &function_id)
103{
105 // Loop contracts mangling consists of four steps.
106 //
107 // 1. Construct a fake function with a fake loop that contains the loop
108 // contracts for parsing the contracts.
109 // 2. Parse the fake function and extract the contracts as exprt.
110 // 3. Replace symbols in the extracted exprt with the symbols in the
111 // symbol table of the goto model according to the symbol map provided by
112 // the user.
113 // 4. Typecheck all contracts exprt.
114 // 5. Annotate all contracts exprt to the corresponding loop.
115
116 // Construct a fake function to parse.
117 // void function_id()
118 // {
119 // while(1==1)
120 // __CPROVER_assigns(assigns_str) // optional
121 // __CPROVER_loop_invariant(invariants_str)
122 // __CPROVER_decreases(decreases_str) // optional
123 // }
124 std::ostringstream pr;
125 pr << "void _fn() { while(1==1)";
126 if(!loop_contracts.assigns.empty())
127 {
128 pr << CPROVER_PREFIX << "assigns(" << loop_contracts.assigns << ") ";
129 }
130 pr << CPROVER_PREFIX << "loop_invariant(" << loop_contracts.invariants
131 << ") ";
132 if(!loop_contracts.decreases.empty())
133 {
134 pr << CPROVER_PREFIX << "decreases(" << loop_contracts.decreases << ") ";
135 }
136 pr << " {}}" << std::endl;
137
138 log.debug() << "Constructing fake function:\n" << pr.str() << log.eom;
139
140 // Parse the fake function.
141 std::istringstream is(pr.str());
142 ansi_c_parsert ansi_c_parser{message_handler};
143 ansi_c_parser.set_line_no(0);
144 ansi_c_parser.set_file("<predicate>");
145 ansi_c_parser.in = &is;
146 ansi_c_parser.for_has_scope = config.ansi_c.for_has_scope;
147 ansi_c_parser.ts_18661_3_Floatn_types = config.ansi_c.ts_18661_3_Floatn_types;
148 ansi_c_parser.__float128_is_keyword = config.ansi_c.__float128_is_keyword;
149 ansi_c_parser.float16_type = config.ansi_c.float16_type;
150 ansi_c_parser.bf16_type = config.ansi_c.bf16_type;
151 ansi_c_parser.fp16_type = config.ansi_c.fp16_type;
152 ansi_c_parser.cpp98 = false; // it's not C++
153 ansi_c_parser.cpp11 = false; // it's not C++
154 ansi_c_parser.mode = config.ansi_c.mode;
155 ansi_c_scanner_init(ansi_c_parser);
156 ansi_c_parser.parse();
157
158 // Extract the invariants from prase_tree.
159 exprt &inv_expr(static_cast<exprt &>(ansi_c_parser.parse_tree.items.front()
160 .declarator()
161 .value()
162 .operands()[0]
164 .operands()[0]);
165
166 log.debug() << "Extracted loop invariants: " << inv_expr.pretty() << "\n"
167 << log.eom;
168
169 // Extract the assigns from parse_tree.
170 std::optional<exprt> assigns_expr;
171 if(!loop_contracts.assigns.empty())
172 {
173 assigns_expr = static_cast<exprt &>(ansi_c_parser.parse_tree.items.front()
174 .declarator()
175 .value()
176 .operands()[0]
177 .add(ID_C_spec_assigns))
178 .operands()[0];
179 }
180
181 // Extract the decreases from parse_tree.
183 if(!loop_contracts.decreases.empty())
184 {
185 for(auto op : static_cast<exprt &>(ansi_c_parser.parse_tree.items.front()
186 .declarator()
187 .value()
188 .operands()[0]
190 .operands())
191 {
192 decreases_exprs.emplace_back(op);
193 }
194 }
195
196 // Replace symbols in the clauses with the symbols in the symbol table.
197 log.debug() << "Start to replace symbols" << log.eom;
198 loop_contracts.replace_symbol(inv_expr);
199 if(assigns_expr.has_value())
200 {
201 loop_contracts.replace_symbol(assigns_expr.value());
202 }
204 {
205 loop_contracts.replace_symbol(decrease_expr);
206 }
207
208 // Add builtin functions that might be used in contracts to the symbol table.
210 add_builtin_pointer_function_symbol("same_object", 2);
211 add_builtin_pointer_function_symbol("OBJECT_SIZE", 1);
212 add_builtin_pointer_function_symbol("POINTER_OBJECT", 1);
213 add_builtin_pointer_function_symbol("POINTER_OFFSET", 1);
214
215 // Typecheck clauses.
217 ansi_c_parser.parse_tree,
219 symbol_table.lookup_ref(function_id).module.c_str(),
220 log.get_message_handler());
221
222 // Typecheck and annotate invariants.
223 {
224 log.debug() << "Start to typecheck invariants." << log.eom;
225 ansi_c_typecheck.typecheck_expr(inv_expr);
228 "old is not allowed in loop invariants.");
229
231 inv_map[loop_idt(function_id, std::stoi(loop_contracts.identifier))] =
234 }
235
236 // Typecheck and annotate assigns.
237 log.debug() << "Start to typecheck assigns." << log.eom;
238 if(assigns_expr.has_value())
239 {
240 ansi_c_typecheck.typecheck_spec_assigns(assigns_expr.value().operands());
241
242 invariant_mapt assigns_map;
243 assigns_map[loop_idt(function_id, std::stoi(loop_contracts.identifier))] =
244 assigns_expr.value();
245 annotate_assigns(assigns_map, goto_model);
246 }
247
248 // Typecheck an annotate decreases.
249 log.debug() << "Start to typecheck decreases." << log.eom;
250 if(!decreases_exprs.empty())
251 {
252 std::map<loop_idt, std::vector<exprt>> decreases_map;
253 decreases_map[loop_idt(function_id, std::stoi(loop_contracts.identifier))] =
254 std::vector<exprt>();
256 {
257 ansi_c_typecheck.typecheck_expr(decrease_expr);
258 decreases_map[loop_idt(function_id, std::stoi(loop_contracts.identifier))]
259 .emplace_back(decrease_expr);
260 }
262 }
263
264 log.debug() << "Mangling finished." << log.eom;
265 // Remove added symbol.
266 symbol_table.remove(CPROVER_PREFIX "loop_entry");
267 symbol_table.remove(CPROVER_PREFIX "same_object");
268 symbol_table.remove(CPROVER_PREFIX "OBJECT_SIZE");
269 symbol_table.remove(CPROVER_PREFIX "POINTER_OBJECT");
270 symbol_table.remove(CPROVER_PREFIX "POINTER_OFFSET");
271}
272
274{
275 auto functions = config["functions"];
276
277 if(functions.is_null())
278 return;
279
280 if(!functions.is_array())
281 throw deserialization_exceptiont("functions entry must be sequence");
282
283 for(const auto &function : to_json_array(functions))
284 {
285 if(!function.is_object())
286 throw deserialization_exceptiont("function entry must be object");
287
288 for(const auto &function_entry : to_json_object(function))
289 {
290 const auto function_name = function_entry.first;
291 const auto &items = function_entry.second;
292
293 if(!items.is_array())
294 throw deserialization_exceptiont("loops entry must be sequence");
295
296 this->functions.emplace_back(function_name, functiont{});
297 functiont &function_config = this->functions.back().second;
298 function_config.regex_str = function_name;
299
300 for(const auto &function_item : to_json_array(items))
301 {
302 if(!function_item.is_object())
303 throw deserialization_exceptiont("loop entry must be object");
304
305 std::string loop_id = "";
306 std::string invariants_str = "";
307 std::string assigns_str = "";
308 std::string decreases_str = "";
309 unchecked_replace_symbolt replace_symbol;
310
311 for(const auto &loop_item : to_json_object(function_item))
312 {
313 if(!loop_item.second.is_string())
314 throw deserialization_exceptiont("loop item must be string");
315
316 if(loop_item.first == "loop_id")
317 {
318 loop_id = loop_item.second.value;
319 continue;
320 }
321
322 if(loop_item.first == "assigns")
323 {
324 assigns_str = loop_item.second.value;
325 continue;
326 }
327
328 if(loop_item.first == "decreases")
329 {
330 decreases_str = loop_item.second.value;
331 continue;
332 }
333
334 if(loop_item.first == "invariants")
335 {
336 invariants_str = loop_item.second.value;
337 continue;
338 }
339
340 if(loop_item.first == "symbol_map")
341 {
342 std::string symbol_map_str = loop_item.second.value;
343
344 // Remove all space in symbol_map_str
345 symbol_map_str.erase(
346 std::remove_if(
347 symbol_map_str.begin(), symbol_map_str.end(), isspace),
348 symbol_map_str.end());
349
350 for(const auto &symbol_map_entry :
352 {
354 const auto &symbol = symbol_table.lookup(symbol_map_pair.back());
355 if(symbol == nullptr)
357 "symbol with id \"" + symbol_map_pair.front() +
358 "\" does not exist in the symbol table");
359 // Create a dummy symbol. The type will be discarded when inserted
360 // into replace_symbol.
362 symbol_map_pair.front(), bool_typet());
363 replace_symbol.insert(old_expr, symbol->symbol_expr());
364 }
365
366 continue;
367 }
368
369 throw deserialization_exceptiont("unexpected loop item");
370 }
371
372 if(loop_id.empty())
373 {
375 "loop entry must have one identifier");
376 }
377
378 if(invariants_str.empty())
379 {
381 "loop entry must have one invariant string");
382 }
383
384 function_config.loop_contracts.emplace_back(
385 loop_id, invariants_str, assigns_str, decreases_str, replace_symbol);
386 }
387 }
388 }
389}
void ansi_c_scanner_init(ansi_c_parsert &)
bool ansi_c_typecheck(ansi_c_parse_treet &ansi_c_parse_tree, symbol_table_baset &symbol_table, const std::string &module, message_handlert &message_handler)
ANSI-C Language Type Checking.
configt config
Definition config.cpp:25
empty_typet void_type()
Definition c_types.cpp:245
pointer_typet pointer_type(const typet &subtype)
Definition c_types.cpp:235
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:562
Boolean AND.
Definition std_expr.h:2125
The Boolean type.
Definition std_types.h:36
Base type of functions.
Definition std_types.h:583
std::vector< parametert > parameterst
Definition std_types.h:586
struct configt::ansi_ct ansi_c
symbol_tablet & symbol_table
message_handlert & message_handler
void configure_functions(const jsont &)
void add_builtin_pointer_function_symbol(std::string function_name, const std::size_t num_of_args)
Add builtin function symbol with function_name into symbol table.
contracts_wranglert(goto_modelt &goto_model, const std::string &file_name, message_handlert &message_handler)
void mangle(const loop_contracts_clauset &loop_contracts, const irep_idt &function_id)
Mangle loop_contracts in the function with function_id
Thrown when failing to deserialize a value from some low level format, like JSON or raw bytes.
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition dstring.h:38
The empty type.
Definition std_types.h:51
Base class for all expressions.
Definition expr.h:56
std::vector< exprt > operandst
Definition expr.h:58
operandst & operands()
Definition expr.h:94
function_mapt function_map
goto_functionst goto_functions
GOTO functions.
Definition goto_model.h:34
Thrown when user-provided input cannot be processed.
Definition json.h:27
Class that provides messages with a built-in verbosity 'level'.
Definition message.h:154
Expression to hold a symbol (variable)
Definition std_expr.h:131
bool remove(const irep_idt &name)
Remove a symbol from the symbol table.
const symbolt * lookup(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
Symbol table entry.
Definition symbol.h:28
The Boolean constant true.
Definition std_expr.h:3190
void insert(const symbol_exprt &old_expr, const exprt &new_expr)
Parse and annotate contracts.
#define CPROVER_PREFIX
int isspace(int c)
Definition ctype.c:80
Forward depth-first search iterators These iterators' copy operations are expensive,...
bool has_subexpr(const exprt &expr, const std::function< bool(const exprt &)> &pred)
returns true if the expression has a subexpression that satisfies pred
json_objectt & to_json_object(jsont &json)
Definition json.h:442
json_arrayt & to_json_array(jsont &json)
Definition json.h:424
bool parse_json(std::istream &in, const std::string &filename, message_handlert &message_handler, jsont &dest)
double log(double x)
Definition math.c:2449
API to expression classes for Pointers.
Unused function removal.
exprt simplify_expr(exprt src, const namespacet &ns)
Pre-defined types.
void split_string(const std::string &s, char delim, std::vector< std::string > &result, bool strip, bool remove_empty)
std::string regex_str
unchecked_replace_symbolt replace_symbol
Loop id used to identify loops.
Definition loop_ids.h:28
void annotate_decreases(const std::map< loop_idt, std::vector< exprt > > &decreases_map, goto_modelt &goto_model)
Annotate the decreases in decreases_map to their corresponding loops.
Definition utils.cpp:766
void annotate_assigns(const std::map< loop_idt, std::set< exprt > > &assigns_map, goto_modelt &goto_model)
Annotate the assigns in assigns_map to their corresponding loops.
Definition utils.cpp:725
void annotate_invariants(const invariant_mapt &invariant_map, goto_modelt &goto_model)
Annotate the invariants in invariant_map to their corresponding loops.
Definition utils.cpp:705
std::map< loop_idt, exprt > invariant_mapt
Definition utils.h:32