CBMC
Loading...
Searching...
No Matches
enumerative_loop_contracts_synthesizer.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: Enumerative Loop Contracts Synthesizer
4
5Author: Qinheping Hu
6
7\*******************************************************************/
8
11
13
14#include <util/arith_tools.h>
15#include <util/c_types.h>
16#include <util/find_symbols.h>
17#include <util/format_expr.h>
19#include <util/prefix.h>
20#include <util/replace_symbol.h>
21#include <util/simplify_expr.h>
22
29
30#include "cegis_evaluator.h"
31#include "expr_enumerator.h"
32
33// substitute all tmp_post variables with their origins in `expr`
35 exprt &dest,
36 const std::unordered_map<exprt, exprt, irep_hash> &tmp_post_map)
37{
39 for(const auto &tmp_post_entry : tmp_post_map)
40 {
43 "tmp_post variables must be symbol expression.");
44 const auto &tmp_post_symbol =
46 r.insert(tmp_post_symbol, tmp_post_entry.second);
47 }
48 r.replace(dest);
49}
50
51std::vector<exprt> construct_terminals(const std::set<symbol_exprt> &symbols)
52{
53 // Construct a vector of terminal expressions.
54 // Terminals include:
55 // 1: scalar type variables and their loop_entry version.
56 // 2: offsets of pointers and loop_entry of pointers.
57 // 3: constants 0 and 1.
58
59 std::vector<exprt> result;
60 for(const auto &e : symbols)
61 {
62 if(e.type().id() == ID_unsignedbv)
63 {
64 // For a variable v with primitive type, we add
65 // v, __CPROVER_loop_entry(v)
66 // into the result.
67 result.push_back(typecast_exprt(e, size_type()));
68 result.push_back(
70 }
71 if((e.type().id() == ID_signedbv))
72 {
73 result.push_back(e);
74 result.push_back(unary_exprt(ID_loop_entry, e, e.type()));
75 }
76 if((e.type().id() == ID_pointer))
77 {
78 // For a variable v with pointer type, we add
79 // __CPROVER_pointer_offset(v),
80 // __CPROVER_pointer_offset(__CPROVER_loop_entry(v))
81 // into the result.
82 result.push_back(pointer_offset_exprt(e, size_type()));
83 result.push_back(pointer_offset_exprt(
84 unary_exprt(ID_loop_entry, e, e.type()), size_type()));
85 }
86 }
87 result.push_back(from_integer(1, signed_short_int_type()));
88 result.push_back(from_integer(0, signed_short_int_type()));
89 return result;
90}
91
93{
95 {
96 natural_loops_mutablet natural_loops;
97 natural_loops(function_p.second.body);
98
99 // TODO: use global may alias instead.
100 local_may_aliast local_may_alias(function_p.second);
101
102 // Initialize invariants for unannotated loops as true
103 for(const auto &loop_head_and_content : natural_loops.loop_map)
104 {
105 // Ignore empty loops and self-looped node.
106 if(loop_head_and_content.second.size() <= 1)
107 continue;
108
112
113 loop_idt new_id(function_p.first, loop_end->loop_number);
114 loop_cfg_infot cfg_info(function_p.second, loop_head_and_content.second);
115
116 log.progress() << "Initialize candidates for the loop at "
117 << loop_end->source_location() << messaget::eom;
118
119 // Turn do while loops of form
120 //
121 // do
122 // { loop body }
123 // while (0)
124 //
125 // into non-loop block
126 //
127 // { loop body }
128 // skip
129 //
130 if(
131 loop_end->is_goto() &&
132 simplify_expr(loop_end->condition(), ns) == false_exprt())
133 {
134 loop_end->turn_into_skip();
135 continue;
136 }
137
138 // we only synthesize invariants and assigns for unannotated loops
139 if(loop_end->condition().find(ID_C_spec_loop_invariant).is_nil())
140 {
141 // Store the loop guard if exists.
143 loop_end->loop_number,
145
146 if(loop_head->has_condition())
147 neg_guards[new_id] = loop_head->condition();
148
149 // Initialize invariant clauses as `true`.
152
153 // Initialize assigns clauses.
154 if(loop_end->condition().find(ID_C_spec_assigns).is_nil())
155 {
156 assigns_map[new_id] = {};
157
158 // Infer loop assigns using alias analysis.
160 local_may_alias, loop_head_and_content.second, assigns_map[new_id]);
161
162 // Don't check assignable for CPROVER symbol
163 for(auto it = assigns_map[new_id].begin();
164 it != assigns_map[new_id].end();) // no ++it
165 {
166 if(auto symbol_expr = expr_try_dynamic_cast<symbol_exprt>(*it))
167 {
168 if(has_prefix(
169 id2string(symbol_expr->get_identifier()), CPROVER_PREFIX))
170 {
171 it = assigns_map[new_id].erase(it);
172 continue;
173 }
174 }
175
176 ++it;
177 };
178
179 // remove loop-local symbols from the inferred set
181
182 // If the set contains pairs (i, a[i]),
183 // we widen them to (i, __CPROVER_POINTER_OBJECT(a))
185 }
186 }
187 }
188 }
189 log.debug() << "Finished candidates initialization." << messaget::eom;
190}
191
193 const exprt &checked_pointer,
194 const std::list<loop_idt> cause_loop_ids)
195{
196 auto new_assign = checked_pointer;
197
198 // Add the new assigns target to the most-inner loop that doesn't contain
199 // the new assigns target yet.
200 for(const auto &loop_id : cause_loop_ids)
201 {
202 // Widen index and dereference to whole object.
203 if(new_assign.id() == ID_index || new_assign.id() == ID_dereference)
204 {
208 {
210 }
211 }
212
213 const auto &source_location =
215 loop_id.loop_number,
216 goto_model.goto_functions.function_map[loop_id.function_id])
217 ->source_location();
218
219 // Simplify expr to avoid offset that is out of scope.
220 // In the case of nested loops, After widening, pointer_object(ptr + i)
221 // can contain the pointer ptr in the scope of both loops, and the offset
222 // i which is only in the scope of the inner loop.
223 // After simplification, pointer_object(ptr + i) -> pointer_object(ptr).
225 new_assign.add_source_location() = source_location;
226
227 // Avoid adding same target.
228 if(assigns_map[loop_id].insert(new_assign).second)
229 return;
230 }
231 INVARIANT(false, "Failed to synthesize a new assigns target.");
232}
233
235{
236 for(auto &goto_function : goto_model.goto_functions.function_map)
237 {
238 for(const auto &instruction : goto_function.second.body.instructions)
239 {
240 // tmp_post variables are symbol lhs of ASSIGN.
241 if(!instruction.is_assign() || instruction.assign_lhs().id() != ID_symbol)
242 continue;
243
244 const auto symbol_lhs =
245 expr_try_dynamic_cast<symbol_exprt>(instruction.assign_lhs());
246
247 // tmp_post variables have identifiers with the prefix tmp::tmp_post.
248 if(
249 id2string(symbol_lhs->get_identifier()).find("tmp::tmp_post") !=
250 std::string::npos)
251 {
252 tmp_post_map[instruction.assign_lhs()] = instruction.assign_rhs();
253 }
254 }
255 }
256}
257
258std::set<symbol_exprt>
260 const loop_idt &cause_loop_id,
261 const exprt &new_clause,
262 const std::set<exprt> &live_vars)
263{
264 // We overapproximate dependent symbols as all symbols in live variables.
265 // TODO: using flow-dependency analysis to rule out not dependent symbols.
266
267 std::set<symbol_exprt> result;
268 for(const auto &e : live_vars)
269 find_symbols(e, result);
270
271 // Erase all variables added during loop transformations---they are not in
272 // the original symbol table.
273 for(auto it = result.begin(); it != result.end();)
274 {
275 if(original_symbol_table.lookup(it->get_identifier()) == nullptr)
276 {
277 it = result.erase(it);
278 }
279 else
280 it++;
281 }
282
283 return result;
284}
285
287 const exprt &violated_predicate)
288{
289 // For the case where the violated predicate is dependent on no instruction
290 // other than loop havocing, the violated_predicate is
291 // WLP(loop_body_before_violation, violated_predicate).
292 // TODO: implement a more complete WLP algorithm.
293 return violated_predicate;
294}
295
297 const exprt &checked_pointer)
298{
299 // The same object predicate says that the checked pointer points to the
300 // same object as it pointed before entering the loop.
301 // It works for the array-manipulating loops where only offset of pointer
302 // are modified but not the object pointers point to.
303 return same_object(
304 checked_pointer, unary_exprt(ID_loop_entry, checked_pointer));
305}
306
308 const std::vector<exprt> terminal_symbols,
309 const loop_idt &cause_loop_id,
310 const irep_idt &violation_id,
311 const std::vector<cext> &cexs)
312{
313 // Synthesis of strengthening clauses is a enumerate-and-check process.
314 // We first construct the enumerator for the following grammar.
315 // And then enumerate clause and check that if it can make the invariant
316 // inductive.
317
318 // Initialize factory representing grammar
319 // StartBool -> StartBool && StartBool | Start == Start
320 // | StartBool <= StartBool | StartBool < StartBool
321 // Start -> Start + Start | terminal_symbols
322 // where a0, and a1 are symbol expressions.
326
327 // terminals
329
330 // rules for Start
332 // Start -> terminals
334 start_args.push_back(&leaf_g);
335
336 // Start -> Start + Start
338 ID_plus,
339 start_ph,
340 start_ph,
341 [](const partitiont &partition) {
342 if(partition.size() <= 1)
343 return true;
344 return partition.front() == 1;
345 },
346 ns);
347 start_args.push_back(&plus_g);
348
349 // rules for StartBool
351 // StartBool -> StartBool && StartBool
353 start_bool_args.push_back(&and_g);
354 // StartBool -> Start == Start
356 start_bool_args.push_back(&equal_g);
357 // StartBool -> Start <= Start
359 start_bool_args.push_back(&le_g);
360 // StartBool -> Start < Start
362 start_bool_args.push_back(&lt_g);
363
364 // add the two nonterminals to the factory
365 factory.attach_productions("Start", start_args);
366 factory.attach_productions("StartBool", start_bool_args);
367
368 // size of candidates we are searching now,
369 // starting from 0
370 size_t size_bound = 0;
371
372 // Count how many candidates are filtered out by the quick filter.
373 size_t count_all = 0;
374 size_t count_filtered = 0;
375
376 // Start to enumerate and check.
377 while(true)
378 {
379 size_bound++;
380
381 // generate candidate and verify
383 {
384 log.progress() << "Verifying candidate: "
394
395 // Quick filter:
396 // Rule out a candidate if its evaluation is inconsistent with examples.
398 count_all++;
399 if(!evaluator.evaluate())
400 {
402 continue;
403 }
404
405 // The verifier we use to check current invariant candidates.
408
409 // A good strengthening clause if
410 // 1. all checks pass, or
411 // 2. the loop invariant is inductive and hold upon the entry.
412 const auto &return_cex = verifier.verify();
413 if(
414 !return_cex.has_value() ||
415 (verifier.properties.at(violation_id).status !=
417 return_cex->violation_type !=
419 return_cex->violation_type !=
421 {
422 log.progress() << "Quick filter: " << count_filtered << " out of "
423 << count_all << " candidates were filtered out.\n";
425 }
426 }
427 }
429}
430
432{
435
436 // The invariants we are going to synthesize and verify are the combined
437 // invariants from in- and post- invariant clauses.
440
441 // The verifier we use to check current invariant candidates.
444
445 // Set of symbols the violation may be dependent on.
446 // We enumerate strengthening clauses built from symbols from the set.
447 std::set<symbol_exprt> dependent_symbols;
448 // Set of symbols we used to enumerate strengthening clauses.
449 std::vector<exprt> terminal_symbols;
450
451 log.debug() << "Start the first synthesis iteration." << messaget::eom;
452 auto return_cex = verifier.verify();
453
454 // Counterexamples we have seen.
455 std::vector<cext> cexs;
456
457 while(return_cex.has_value())
458 {
459 cexs.push_back(return_cex.value());
461 // Synthesize the new_clause
462 // We use difference strategies for different type of violations.
463 switch(return_cex->violation_type)
464 {
467 synthesize_range_predicate(return_cex->violated_predicate);
468 break;
469
470 case cext::violation_typet ::cex_null_pointer:
473 break;
474
477 return_cex->checked_pointer, return_cex->cause_loop_ids);
478 break;
479
481 // Update the dependent symbols.
483 return_cex->cause_loop_ids.front(),
485 return_cex->live_variables);
490 return_cex->cause_loop_ids.front(),
491 verifier.target_violation_id,
492 cexs);
493 break;
494
496 INVARIANT(false, "unsupported violation type");
497 break;
498 }
499
500 // Assigns map has already been updated in the switch block.
501 // Update invariants map for other types of violations.
503 {
504 INVARIANT(!return_cex->cause_loop_ids.empty(), "No cause loop found!");
505 INVARIANT(
507 "failed to synthesized meaningful clause");
508
509 // There could be tmp_post variables in the synthesized clause.
510 // We substitute them with their original variables.
512
513 const auto &cause_loop_id = return_cex->cause_loop_ids.front();
514 // Update the dependent symbols.
517
518 // add the new clause to the candidate invariants.
519 if(
520 return_cex->violation_location ==
522 {
523 // When the violation happens in the loop guard, the new clause
524 // should hold for the both cases of
525 // 1. loop guard holds --- loop_guard -> in_invariant
526 // 2. loop guard doesn't hold --- !loop_guard -> pos_invariant
531 }
532 else if(
533 return_cex->violation_location == cext::violation_locationt::in_loop)
534 {
535 // When the violation happens in the loop body, the new clause
536 // should hold for the case of
537 // loop guard holds --- loop_guard -> in_invariant
540 }
541 else
542 {
543 // When the violation happens after the loop body, the new clause
544 // should hold for the case of
545 // loop guard doesn't hold --- !loop_guard -> pos_invariant
548 }
549
550 // Re-combine invariant clauses and update the candidate map.
553 }
554
555 return_cex = verifier.verify();
556 }
557
558 log.result() << "result : " << log.green << "PASS" << log.reset
559 << messaget::eom;
560
561 return combined_invariant;
562}
563
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
signedbv_typet signed_short_int_type()
Definition c_types.cpp:29
Evaluate if an expression is consistent with examples.
Classes providing CFG-based information about symbols to guide simplifications in function and loop a...
Operator to return the address of an object.
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:562
Boolean AND.
Definition std_expr.h:2125
Enumerator that enumerates binary functional expressions.
Evaluator for checking if an expression is consistent with a given set of test cases (positive exampl...
Verifier that take a goto program as input, and output formatted counterexamples for counterexample-g...
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition dstring.h:38
std::map< loop_idt, std::set< exprt > > assigns_map
Synthesized assigns clauses.
invariant_mapt synthesize_all() override
This synthesizer guarantees that, with the synthesized loop contracts, all checks in the annotated GO...
exprt synthesize(loop_idt loop_id) override
Synthesize loop invariant for a specified loop in the goto_model.
exprt synthesize_range_predicate(const exprt &violated_predicate)
Synthesize range predicate for OOB violation with violated_predicate.
const symbol_tablet original_symbol_table
Symbol table of the input goto model.
std::set< symbol_exprt > compute_dependent_symbols(const loop_idt &cause_loop_id, const exprt &new_clause, const std::set< exprt > &live_vars)
Compute the dependent symbols for a loop with invariant-not-preserved violation which happen after ne...
invariant_mapt in_invariant_clause_map
Synthesize invariant of form (in_inv || !guard) && (!guard -> pos_inv)
void build_tmp_post_map()
Scan all ASSIGN instructions to build the map from tmp_post variables to their original variables.
std::unordered_map< exprt, exprt, irep_hash > tmp_post_map
Map from tmp_post variables to their original variables.
void init_candidates()
Initialize invariants as true for all unannotated loops.
void synthesize_assigns(const exprt &checked_pointer, const std::list< loop_idt > cause_loop_ids)
Synthesize assigns target and update assigns_map.
exprt synthesize_strengthening_clause(const std::vector< exprt > terminal_symbols, const loop_idt &cause_loop_id, const irep_idt &violation_id, const std::vector< cext > &cexs)
Synthesize strengthening clause for invariant-not-preserved violation.
exprt synthesize_same_object_predicate(const exprt &checked_pointer)
Synthesize same object predicate for null-pointer violation for checked_pointer.
Factory for enumerator that can be used to represent a tree grammar.
void attach_productions(const std::string &id, const enumeratorst &enumerators)
Attach enumerators to the placeholder with id.
Base class for all expressions.
Definition expr.h:56
The Boolean constant false.
Definition std_expr.h:3199
function_mapt function_map
goto_functionst goto_functions
GOTO functions.
Definition goto_model.h:34
instructionst::iterator targett
A class containing utility functions for havocing expressions.
Definition havoc_utils.h:28
Enumerator that enumerates leaf expressions from a given list.
loop_mapt loop_map
void erase_locals(std::set< exprt > &exprs)
Definition cfg_info.h:171
static const commandt reset
return to default formatting, as defined by the terminal
Definition message.h:335
mstreamt & debug() const
Definition message.h:421
static const commandt green
render text with green foreground color
Definition message.h:341
mstreamt & progress() const
Definition message.h:416
mstreamt & result() const
Definition message.h:401
static eomt eom
Definition message.h:289
The offset (in bytes) of a pointer relative to the object.
Placeholders for actual enumerators, which represent nonterminals.
Replace a symbol expression by a given expression.
The Boolean constant true.
Definition std_expr.h:3190
Semantic type conversion.
Definition std_expr.h:2073
Generic base class for unary expressions.
Definition std_expr.h:361
std::unordered_set< exprt, irep_hash > expr_sett
bool has_prefix(const std::string &s, const std::string &prefix)
Definition converter.cpp:13
static exprt evaluator(const std::unordered_map< exprt, exprt, irep_hash > &memory, const decision_proceduret &solver, exprt src, const namespacet &ns)
#define CPROVER_PREFIX
std::vector< exprt > construct_terminals(const std::set< symbol_exprt > &symbols)
void replace_tmp_post(exprt &dest, const std::unordered_map< exprt, exprt, irep_hash > &tmp_post_map)
Enumerative Loop Contracts Synthesizer.
Enumerator Interface.
std::list< const enumerator_baset * > enumeratorst
std::list< std::size_t > partitiont
static bool find_symbols(symbol_kindt, const typet &, std::function< bool(const symbol_exprt &)>, std::unordered_set< irep_idt > &bindings, const std::vector< irep_idt > &subs_to_find)
Find identifiers with id ID_symbol of the sub expressions and the subs with ID in subs_to_find consid...
static format_containert< T > format(const T &o)
Definition format.h:37
Utilities for building havoc code for expressions.
const std::string & id2string(const irep_idt &d)
Definition irep.h:44
static int8_t r
Definition irep_hash.h:60
Field-insensitive, location-sensitive may-alias analysis.
void get_assigns(const local_may_aliast &local_may_alias, const loopt &loop, assignst &assigns)
Helper functions for k-induction and loop invariants.
Compute natural loops in a goto_function.
exprt same_object(const exprt &p1, const exprt &p2)
exprt pointer_object(const exprt &p)
Various predicates over pointers in programs.
@ FAIL
The property was violated.
exprt simplify_expr(exprt src, const namespacet &ns)
#define UNREACHABLE
This should be used to mark dead code.
Definition invariant.h:525
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition invariant.h:423
bool is_constant(const typet &type)
This method tests, if the given typet is a constant.
Definition std_types.h:29
Loop id used to identify loops.
Definition loop_ids.h:28
invariant_mapt combine_in_and_post_invariant_clauses(const invariant_mapt &in_clauses, const invariant_mapt &post_clauses, const invariant_mapt &neg_guards)
Combine invariant of form (in_inv || !guard) && (!guard -> pos_inv)
#define size_type
Definition unistd.c:186
goto_programt::targett get_loop_head(const unsigned int target_loop_number, goto_functiont &function)
Find and return the first instruction of the natural loop with loop_number in function.
Definition utils.cpp:640
void widen_assigns(assignst &assigns, const namespacet &ns)
Widen expressions in assigns with the following strategy.
Definition utils.cpp:360
goto_programt::targett get_loop_end_from_loop_head_and_content_mutable(const goto_programt::targett &loop_head, const loop_templatet< goto_programt::targett, goto_programt::target_less_than > &loop)
Find the goto instruction of loop that jumps to loop_head
Definition utils.cpp:584
std::map< loop_idt, exprt > invariant_mapt
Definition utils.h:32