CBMC
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
symex_dereference.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: Symbolic Execution of ANSI-C
4
5Author: Daniel Kroening, kroening@kroening.com
6
7\*******************************************************************/
8
11
12#include "goto_symex.h"
13
14#include <util/arith_tools.h>
15#include <util/byte_operators.h>
16#include <util/c_types.h>
18#include <util/expr_util.h>
19#include <util/fresh_symbol.h>
20#include <util/invariant.h>
22
24
25#include "expr_skeleton.h"
26#include "path_storage.h"
27#include "symex_assign.h"
29
44 const exprt &expr,
45 statet &state,
46 bool keep_array)
47{
48 exprt result;
49
52 {
53 // address_of(byte_extract(op, offset, t)) is
54 // address_of(op) + offset with adjustments for arrays
55
57
58 // recursive call
59 result = address_arithmetic(be.op(), state, keep_array);
60
61 if(be.op().type().id() == ID_array && result.id() == ID_address_of)
62 {
64
65 // turn &a of type T[i][j] into &(a[0][0])
66 for(const typet *t = &(to_type_with_subtype(a.type()).subtype());
67 t->id() == ID_array && expr.type() != *t;
68 t = &(to_array_type(*t).element_type()))
69 a.object() = index_exprt(a.object(), from_integer(0, c_index_type()));
70 }
71
72 // do (expr.type() *)(((char *)op)+offset)
73 result=typecast_exprt(result, pointer_type(char_type()));
74
75 // there could be further dereferencing in the offset
76 exprt offset=be.offset();
77 dereference_rec(offset, state, false, false);
78
79 result=plus_exprt(result, offset);
80
81 // treat &array as &array[0]
82 const typet &expr_type = expr.type();
84
85 if(expr_type.id()==ID_array && !keep_array)
87 else
89
91 }
92 else if(expr.id()==ID_index ||
93 expr.id()==ID_member)
94 {
96 ode.build(expr, ns);
97
99 make_byte_extract(ode.root_object(), ode.offset(), expr.type());
100
101 // recursive call
102 result = address_arithmetic(be, state, keep_array);
103
104 do_simplify(result);
105 }
106 else if(expr.id()==ID_dereference)
107 {
108 // ANSI-C guarantees &*p == p no matter what p is,
109 // even if it's complete garbage
110 // just grab the pointer, but be wary of further dereferencing
111 // in the pointer itself
112 result=to_dereference_expr(expr).pointer();
113 dereference_rec(result, state, false, false);
114 }
115 else if(expr.id()==ID_if)
116 {
118
119 // the condition is not an address
120 dereference_rec(if_expr.cond(), state, false, false);
121
122 // recursive call
123 if_expr.true_case() =
124 address_arithmetic(if_expr.true_case(), state, keep_array);
125 if_expr.false_case() =
126 address_arithmetic(if_expr.false_case(), state, keep_array);
127
128 result=if_expr;
129 }
130 else if(expr.id()==ID_symbol ||
131 expr.id()==ID_string_constant ||
132 expr.id()==ID_label ||
133 expr.id()==ID_array)
134 {
135 // give up, just dereference
136 result=expr;
137 dereference_rec(result, state, false, false);
138
139 // turn &array into &array[0]
140 if(result.type().id() == ID_array && !keep_array)
141 result = index_exprt(result, from_integer(0, c_index_type()));
142
143 // handle field-sensitive SSA symbol
144 mp_integer offset=0;
145 if(is_ssa_expr(expr))
146 {
148 PRECONDITION(offset_opt.has_value());
149 offset = *offset_opt;
150 }
151
152 if(offset>0)
153 {
155 to_ssa_expr(expr).get_l1_object(),
156 from_integer(offset, c_index_type()),
157 expr.type());
158
159 result = address_arithmetic(be, state, keep_array);
160
161 do_simplify(result);
162 }
163 else
164 result=address_of_exprt(result);
165 }
166 else if(expr.id() == ID_typecast)
167 {
169
170 result = address_arithmetic(tc_expr.op(), state, keep_array);
171
172 // treat &array as &array[0]
173 const typet &expr_type = expr.type();
175
176 if(expr_type.id() == ID_array && !keep_array)
177 dest_type_subtype = to_array_type(expr_type).element_type();
178 else
180
182 }
183 else
185 "goto_symext::address_arithmetic does not handle " + expr.id_string());
186
187 const typet &expr_type = expr.type();
188 INVARIANT(
189 (expr_type.id() == ID_array && !keep_array) ||
190 pointer_type(expr_type) == result.type(),
191 "either non-persistent array or pointer to result");
192
193 return result;
194}
195
198{
199 auto const cache_key = [&] {
200 auto cache_key =
201 state.field_sensitivity.apply(ns, state, dereference_result, false);
203 {
204 let_expr->value() = state.rename<L2>(let_expr->value(), ns).get();
205 }
206 else
207 {
208 cache_key = state.rename<L2>(cache_key, ns).get();
209 }
210 return cache_key;
211 }();
212
213 if(auto cached = state.dereference_cache.find(cache_key))
214 {
215 return *cached;
216 }
217
219 cache_key.type(),
220 "symex",
221 "dereference_cache",
222 dereference_result.source_location(),
224 ns,
225 state.symbol_table)
226 .symbol_expr();
227
228 // we need to lift possible lets
229 // (come from the value set to avoid repeating complex pointer comparisons)
230 auto cache_value = cache_key;
231 lift_lets(state, cache_value);
232
233 auto assign = symex_assignt{
235 state,
237 ns,
239 target};
240
241 assign.assign_symbol(
242 to_ssa_expr(state.rename<L1>(cache_symbol_expr, ns).get()),
245 {});
246
248 return cache_symbol_expr;
249}
250
263 exprt &expr,
264 statet &state,
265 bool write,
267{
268 if(expr.id()==ID_dereference)
269 {
270 bool expr_is_not_null = false;
271
272 if(state.threads.size() == 1)
273 {
275 if(!expr_function.empty())
276 {
277 const dereference_exprt to_check =
279
281 .is_safe_dereference(to_check, state.source.pc);
282 }
283 }
284
285 exprt tmp1;
286 tmp1.swap(to_dereference_expr(expr).pointer());
287
288 // first make sure there are no dereferences in there
290
291 // Depending on the nature of the pointer expression, the recursive deref
292 // operation might have introduced a construct such as
293 // (x == &o1 ? o1 : o2).field, in which case we should simplify to push the
294 // member operator inside the if, then apply field-sensitivity to yield
295 // (x == &o1 ? o1..field : o2..field). value_set_dereferencet can then
296 // apply the dereference operation to each of o1..field and o2..field
297 // independently, as it special-cases the ternary conditional operator.
298 // There may also be index operators in tmp1 which can now be resolved to
299 // constant array cell references, so we replace symbols with constants
300 // first, hoping for a transformation such as
301 // (x == &o1 ? o1 : o2)[idx] =>
302 // (x == &o1 ? o1 : o2)[2] =>
303 // (x == &o1 ? o1[[2]] : o2[[2]])
304 // Note we don't L2 rename non-constant symbols at this point, because the
305 // value-set works in terms of L1 names and we don't want to ask it to
306 // dereference an L2 pointer, which it would not have an entry for.
307
309
311
312 if(symex_config.run_validation_checks)
313 {
314 // make sure simplify has not re-introduced any dereferencing that
315 // had previously been cleaned away
316 INVARIANT(
318 "simplify re-introduced dereferencing");
319 }
320
321 tmp1 = state.field_sensitivity.apply(ns, state, std::move(tmp1), false);
322
323 // we need to set up some elaborate call-backs
325
327 ns,
328 state.symbol_table,
333
334 // std::cout << "**** " << format(tmp1) << '\n';
335 exprt tmp2 =
336 dereference.dereference(tmp1, symex_config.show_points_to_sets);
337 // std::cout << "**** " << format(tmp2) << '\n';
338
339
340 // this may yield a new auto-object
342
343 // Check various conditions for when we should try to cache the expression.
344 // 1. Caching dereferences must be enabled.
345 // 2. Do not cache inside LHS of writes.
346 // 3. Do not cache inside quantifiers (references variables outside their
347 // scope).
348 // 4. Only cache "complicated" expressions, i.e. of the form
349 // [let p = <expr> in ]
350 // (p == &something ? something : ...))
351 // Otherwise we should just return it unchanged.
352 if(
353 symex_config.cache_dereferences && !write && !is_in_binding_expression &&
354 (tmp2.id() == ID_if || tmp2.id() == ID_let))
355 {
356 expr = cache_dereference(tmp2, state);
357 }
358 else
359 {
360 expr = std::move(tmp2);
361 }
362 }
363 else if(
364 expr.id() == ID_index && to_index_expr(expr).array().id() == ID_member &&
365 to_array_type(to_index_expr(expr).array().type()).size().is_zero())
366 {
367 // This is an expression of the form x.a[i],
368 // where a is a zero-sized array. This gets
369 // re-written into *(&x.a+i)
370
372
374 address_of_expr.type()=pointer_type(expr.type());
375
377 tmp.add_source_location()=expr.source_location();
378
379 // recursive call
381
382 expr.swap(tmp);
383 }
384 else if(expr.id()==ID_index &&
385 to_index_expr(expr).array().type().id()==ID_pointer)
386 {
387 // old stuff, will go away
389 }
390 else if(expr.id()==ID_address_of)
391 {
393
394 exprt &object=address_of_expr.object();
395
396 expr = address_arithmetic(
397 object, state, to_pointer_type(expr.type()).base_type().id() == ID_array);
398 }
399 else if(expr.id()==ID_typecast)
400 {
401 exprt &tc_op=to_typecast_expr(expr).op();
402
403 // turn &array into &array[0] when casting to pointer-to-element-type
404 if(
405 tc_op.id() == ID_address_of &&
406 to_address_of_expr(tc_op).object().type().id() == ID_array &&
407 expr.type() ==
409 .element_type()))
410 {
413
415 }
416 else
417 {
419 }
420 }
421 else
422 {
425 Forall_operands(it, expr)
426 {
429 }
430 }
431}
432
433static exprt
435{
437 {
438 deref->op() = f(std::move(deref->op()));
439 return *deref;
440 }
441
442 for(auto &sub : e.operands())
443 sub = apply_to_objects_in_dereference(std::move(sub), f);
444 return e;
445}
446
485{
486 PRECONDITION(!state.call_stack().empty());
487
488 // Symbols whose address is taken need to be renamed to level 1
489 // in order to distinguish addresses of local variables
490 // from different frames.
491 expr = apply_to_objects_in_dereference(std::move(expr), [&](exprt e) {
492 return state.field_sensitivity.apply(
493 ns, state, state.rename<L1>(std::move(e), ns).get(), false);
494 });
495
496 // start the recursion!
497 dereference_rec(expr, state, write, false);
498 // dereferencing may introduce new symbol_exprt
499 // (like __CPROVER_memory)
500 expr = state.rename<L1>(std::move(expr), ns).get();
501
502 // Dereferencing is likely to introduce new member-of-if constructs --
503 // for example, "x->field" may have become "(x == &o1 ? o1 : o2).field."
504 // Run expression simplification, which converts that to
505 // (x == &o1 ? o1.field : o2.field))
506 // before applying field sensitivity. Field sensitivity can then turn such
507 // field-of-symbol expressions into atomic SSA expressions instead of having
508 // to rewrite all of 'o1' otherwise.
509 // Even without field sensitivity this can be beneficial: for example,
510 // "(b ? s1 : s2).member := X" results in
511 // (b ? s1 : s2) := (b ? s1 : s2) with (member := X)
512 // and then
513 // s1 := b ? ((b ? s1 : s2) with (member := X)) : s1
514 // when all we need is
515 // s1 := s1 with (member := X) [and guard b]
516 // s2 := s2 with (member := X) [and guard !b]
517 do_simplify(expr);
518
519 if(symex_config.run_validation_checks)
520 {
521 // make sure simplify has not re-introduced any dereferencing that
522 // had previously been cleaned away
523 INVARIANT(
525 "simplify re-introduced dereferencing");
526 }
527
528 expr = state.field_sensitivity.apply(ns, state, std::move(expr), write);
529}
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
byte_extract_exprt make_byte_extract(const exprt &_op, const exprt &_offset, const typet &_type)
Construct a byte_extract_exprt with endianness and byte width matching the current configuration.
Expression classes for byte-level operators.
const byte_extract_exprt & to_byte_extract_expr(const exprt &expr)
pointer_typet pointer_type(const typet &subtype)
Definition c_types.cpp:235
bitvector_typet char_type()
Definition c_types.cpp:106
bitvector_typet c_index_type()
Definition c_types.cpp:16
Operator to return the address of an object.
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:562
Expression of type type extracted from some object op starting at position offset (given in number of...
Operator to dereference a pointer.
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition dstring.h:38
Expression in which some part is missing and can be substituted for another expression.
Base class for all expressions.
Definition expr.h:56
typet & type()
Return the type of the expression.
Definition expr.h:84
operandst & operands()
Definition expr.h:94
const source_locationt & source_location() const
Definition expr.h:231
exprt apply(const namespacet &ns, goto_symex_statet &state, exprt expr, bool write) const
Turn an expression expr into a field-sensitive SSA expression.
sharing_mapt< exprt, symbol_exprt, false, irep_hash > dereference_cache
This is used for eliminating repeated complicated dereferences.
Definition goto_state.h:43
Central data structure: state.
call_stackt & call_stack()
renamedt< exprt, level > rename(exprt expr, const namespacet &ns)
Rewrites symbol expressions in exprt, applying a suffix to each symbol reflecting its most recent ver...
symbol_tablet symbol_table
contains symbols that are minted during symbolic execution, such as dynamically created objects etc.
field_sensitivityt field_sensitivity
symex_targett::sourcet source
std::vector< threadt > threads
irep_idt language_mode
language_mode: ID_java, ID_C or another language identifier if we know the source language in use,...
Definition goto_symex.h:241
path_storaget & path_storage
Symbolic execution paths to be resumed later.
Definition goto_symex.h:810
symex_target_equationt & target
The equation that this execution is building up.
Definition goto_symex.h:266
shadow_memoryt shadow_memory
Shadow memory instrumentation API.
Definition goto_symex.h:839
namespacet ns
Initialized just before symbolic execution begins, to point to both outer_symbol_table and the symbol...
Definition goto_symex.h:258
void trigger_auto_object(const exprt &, statet &)
void lift_lets(statet &, exprt &)
Execute any let expressions in expr using symex_assignt::assign_symbol.
exprt address_arithmetic(const exprt &, statet &, bool keep_array)
Transforms an lvalue expression by replacing any dereference operations it contains with explicit ref...
virtual void do_simplify(exprt &expr)
symbol_exprt cache_dereference(exprt &dereference_result, statet &state)
messaget log
The messaget to write log messages to.
Definition goto_symex.h:278
const symex_configt symex_config
The configuration to use for this symbolic execution.
Definition goto_symex.h:185
virtual void dereference(exprt &, statet &, bool write)
Replace all dereference operations within expr with explicit references to the objects they may refer...
void dereference_rec(exprt &expr, statet &state, bool write, bool is_in_quantifier)
If expr is a dereference_exprt, replace it with explicit references to the objects it may point to.
The trinary if-then-else operator.
Definition std_expr.h:2497
Array index operator.
Definition std_expr.h:1470
const std::string & id_string() const
Definition irep.h:391
void swap(irept &irep)
Definition irep.h:434
const irep_idt & id() const
Definition irep.h:388
message_handlert & get_message_handler()
Definition message.h:183
Split an expression into a base object and a (byte) offset.
std::unordered_map< irep_idt, local_safe_pointerst > safe_pointers
Map function identifiers to local_safe_pointerst instances.
The plus expression Associativity is not specified.
Definition std_expr.h:1002
void insert(const key_type &k, valueU &&m)
Insert element, element must not exist in map.
std::optional< std::reference_wrapper< const mapped_type > > find(const key_type &k) const
Find element.
Expression to hold a symbol (variable)
Definition std_expr.h:131
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition symbol.cpp:121
Functor for symex assignment.
Callback object that goto_symext::dereference_rec provides to value_set_dereferencet to provide value...
Semantic type conversion.
Definition std_expr.h:2073
The type of an expression, extends irept.
Definition type.h:29
Thrown when we encounter an instruction, parameters to an instruction etc.
Wrapper for a function dereferencing pointer expressions using a value set.
#define Forall_operands(it, expr)
Definition expr.h:27
Expression skeleton.
bool has_subexpr(const exprt &expr, const std::function< bool(const exprt &)> &pred)
returns true if the expression has a subexpression that satisfies pred
Deprecated expression utility functions.
symbolt & get_fresh_aux_symbol(const typet &type, const std::string &name_prefix, const std::string &basename_prefix, const source_locationt &source_location, const irep_idt &symbol_mode, const namespacet &ns, symbol_table_baset &symbol_table)
Installs a fresh-named symbol with respect to the given namespace ns with the requested name pattern ...
Fresh auxiliary symbol creation.
Symbolic Execution.
Storage of symbolic execution paths to resume.
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast an exprt to an address_of_exprt.
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
const dereference_exprt & to_dereference_expr(const exprt &expr)
Cast an exprt to a dereference_exprt.
std::optional< mp_integer > compute_pointer_offset(const exprt &expr, const namespacet &ns)
Pointer Logic.
@ L2
Definition renamed.h:26
@ L1_WITH_CONSTANT_PROPAGATION
Definition renamed.h:25
@ L1
Definition renamed.h:24
exprt get_original_name(exprt expr)
Undo all levels of renaming.
#define UNREACHABLE
This should be used to mark dead code.
Definition invariant.h:525
#define PRECONDITION(CONDITION)
Definition invariant.h:463
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition invariant.h:423
bool is_ssa_expr(const exprt &expr)
Definition ssa_expr.h:125
const ssa_exprt & to_ssa_expr(const exprt &expr)
Cast a generic exprt to an ssa_exprt.
Definition ssa_expr.h:145
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
Definition std_expr.h:1538
bool can_cast_expr< binding_exprt >(const exprt &base)
Definition std_expr.h:3285
const typecast_exprt & to_typecast_expr(const exprt &expr)
Cast an exprt to a typecast_exprt.
Definition std_expr.h:2107
bool can_cast_expr< let_exprt >(const exprt &base)
Definition std_expr.h:3439
const if_exprt & to_if_expr(const exprt &expr)
Cast an exprt to an if_exprt.
Definition std_expr.h:2577
const array_typet & to_array_type(const typet &type)
Cast a typet to an array_typet.
Definition std_types.h:888
goto_programt::const_targett pc
Symbolic Execution of assignments.
static exprt apply_to_objects_in_dereference(exprt e, const std::function< exprt(exprt)> &f)
Symbolic Execution of ANSI-C.
const type_with_subtypet & to_type_with_subtype(const typet &type)
Definition type.h:208
ssize_t write(int fildes, const void *buf, size_t nbyte)
Definition unistd.c:195
Pointer Dereferencing.