CBMC
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
shadow_memory.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: Symex Shadow Memory Instrumentation
4
5Author: Peter Schrammel
6
7\*******************************************************************/
8
11
12#include "shadow_memory.h"
13
16#include <util/format_expr.h>
17#include <util/format_type.h>
18#include <util/fresh_symbol.h>
19#include <util/pointer_expr.h>
21
24
25#include "goto_symex_state.h"
26#include "shadow_memory_util.h"
27
29 goto_symex_statet &state,
30 exprt expr,
32{
33 // clean_pointer_expr may change the type
34 typet type = expr.type();
36 for(const auto &field_pair : fields)
37 {
38 const symbol_exprt &shadow = add_field(state, expr, field_pair.first, type);
39
40 if(
41 field_pair.second.id() == ID_typecast &&
42 to_typecast_expr(field_pair.second).op().is_zero())
43 {
44 const auto zero_value =
45 zero_initializer(type, expr.source_location(), ns);
46 CHECK_RETURN(zero_value.has_value());
47
48 symex_assign(state, shadow, *zero_value);
49 }
50 else
51 {
52 exprt init_expr = field_pair.second;
53 const auto init_value =
55 CHECK_RETURN(init_value.has_value());
56
57 symex_assign(state, shadow, *init_value);
58 }
59
60 log.debug() << "Shadow memory: initialize field "
61 << id2string(shadow.get_identifier()) << " for " << format(expr)
62 << " with initial value " << format(field_pair.second)
64 }
65}
66
68 goto_symex_statet &state,
69 const exprt &expr,
70 const irep_idt &field_name,
71 const typet &field_type)
72{
73 const auto &function_symbol = ns.lookup(state.source.function_id);
76 field_type,
79 state.source.pc->source_location(),
80 function_symbol.mode,
81 state.symbol_table)
82 .symbol_expr();
83
85 addresses.push_back(
87
88 return addresses.back().shadow;
89}
90
92 goto_symex_statet &state,
93 const exprt::operandst &arguments)
94{
95 // parse set_field call
97 arguments.size() == 3, CPROVER_PREFIX "set_field requires 3 arguments");
99
100 exprt expr = arguments[0];
101 typet expr_type = expr.type();
103 expr_type.id() == ID_pointer,
104 "shadow memory requires a pointer expression",
105 irep_pretty_diagnosticst{expr_type});
106
107 exprt value = arguments[2];
109 INVARIANT(
110 state.shadow_memory.address_fields.count(field_name) == 1,
111 id2string(field_name) + " should exist");
112 const auto &addresses = state.shadow_memory.address_fields.at(field_name);
113
114 // get value set
116
118
119 std::vector<exprt> value_set = state.value_set.get_value_set(expr, ns);
121 if(check_value_set_contains_only_null_ptr(ns, log, value_set, expr))
122 {
123 log.warning() << "Shadow memory: cannot set shadow memory of NULL"
124 << messaget::eom;
125 return;
126 }
127
128 // build lhs
129 const exprt &rhs = value;
130 size_t mux_size = 0;
131 std::optional<exprt> maybe_lhs =
132 get_shadow_memory(expr, value_set, addresses, ns, log, mux_size);
133
134 // add to equation
135 if(maybe_lhs.has_value())
136 {
137 if(mux_size >= 10)
138 {
139 log.warning() << "Shadow memory: mux size set_field: " << mux_size
140 << messaget::eom;
141 }
142 else
143 {
144 log.debug() << "Shadow memory: mux size set_field: " << mux_size
145 << messaget::eom;
146 }
147 const exprt lhs = deref_expr(*maybe_lhs);
148
150
151 if(lhs.type().id() == ID_empty)
152 {
153 std::stringstream s;
154 s << "Shadow memory: cannot set shadow memory via type void* for "
155 << format(expr)
156 << ". Insert a cast to the type that you want to access.";
157 throw invalid_input_exceptiont(s.str());
158 }
159
160 // Get the type of the shadow memory for this field
162 // Add a conditional cast to the shadow memory field type if `rhs` is not of
163 // the expected type
164 const exprt casted_rhs =
166 // We replicate the rhs value on each byte of the value that we set.
167 // This allows the get_field or/max semantics to operate correctly
168 // on unions.
169 const auto per_byte_rhs =
171 CHECK_RETURN(per_byte_rhs.has_value());
172
175 state,
176 lhs,
178 }
179 else
180 {
181 log.warning() << "Shadow memory: cannot set_field for " << format(expr)
182 << messaget::eom;
183 }
184}
185
186// Function synopsis
187// value_set = get_value_set(expr)
188// foreach object in value_set:
189// if(object invalid) continue;
190// get_field(&exact_match)
191// if(exact_match)
192// return;
193// return;
195 goto_symex_statet &state,
196 const exprt &lhs,
197 const exprt::operandst &arguments)
198{
199 INVARIANT(
200 arguments.size() == 2, CPROVER_PREFIX "get_field requires 2 arguments");
201 irep_idt field_name = extract_field_name(arguments[1]);
202
203 exprt expr = arguments[0];
204 typet expr_type = expr.type();
206 expr_type.id() == ID_pointer,
207 "shadow memory requires a pointer expression");
209
210 INVARIANT(
211 state.shadow_memory.address_fields.count(field_name) == 1,
212 id2string(field_name) + " should exist");
213 const auto &addresses = state.shadow_memory.address_fields.at(field_name);
214
215 // Return null (invalid object) instead of auto-object or invalid-object.
216 // This will "polish" expr from invalid and auto-obj
218
219 std::vector<exprt> value_set = state.value_set.get_value_set(expr, ns);
221
222 std::vector<std::pair<exprt, exprt>> rhs_conds_values;
224 // Used to give a default value for invalid pointers and other usages
226
228 {
230 // If we have an invalid pointer, then return the default value of the
231 // shadow memory as dereferencing it would fail
232 rhs_conds_values.emplace_back(
233 true_exprt(),
235 }
236
237 for(const auto &matched_object : value_set)
238 {
239 // Ignore "unknown"
241 {
242 log.warning() << "Shadow memory: value set contains unknown for "
243 << format(expr) << messaget::eom;
244 continue;
245 }
246 // Ignore "integer_address"
247 if(
248 to_object_descriptor_expr(matched_object).root_object().id() ==
250 {
251 log.warning() << "Shadow memory: value set contains integer_address for "
252 << format(expr) << messaget::eom;
253 continue;
254 }
255 // Ignore "ID_C_is_failed_symbol" (another incarnation of invalid pointer)
256 // TODO: check if this is obsolete (or removed by
257 // replace_invalid_object_by_null) or add default value
258 if(matched_object.type().get_bool(ID_C_is_failed_symbol))
259 {
260 log.warning() << "Shadow memory: value set contains failed symbol for "
261 << format(expr) << messaget::eom;
262 continue;
263 }
264
265 bool exact_match = false;
266
267 // List of condition ==> value (read condition implies values)
269 ns,
270 log,
272 addresses,
273 field_init_expr.type(),
274 expr,
275 lhs.type(),
277
278 // If we have an exact match we discard all the previous conditions and
279 // create an assignment. Then we'll break
280 if(exact_match)
281 {
283 }
284 // Process this match (exact will contain only one set of conditions).
285 rhs_conds_values.insert(
286 rhs_conds_values.end(),
289 if(exact_match)
290 {
291 break;
292 }
293 }
294
295 // If we have at least a condition ==> value
296 if(!rhs_conds_values.empty())
297 {
298 // Build the rhs expression from the shadow memory (big switch for all
299 // condition ==> value)
302 const size_t mux_size = rhs_conds_values.size() - 1;
303 // Don't debug if the switch is too big
304 if(mux_size >= 10)
305 {
306 log.warning() << "Shadow memory: mux size get_field: " << mux_size
307 << messaget::eom;
308 }
309 else
310 {
311 log.debug() << "Shadow memory: mux size get_field: " << mux_size
312 << messaget::eom;
313 }
314
316
317 // create the assignment of __CPROVER_shadow_memory_get_field
318 symex_assign(state, lhs, typecast_exprt::conditional_cast(rhs, lhs.type()));
319 }
320 else
321 {
322 // We don't have any condition ==> value for the pointer, so we fall-back to
323 // the initialization value of the shadow-memory.
324 log.warning() << "Shadow memory: cannot get_field for " << format(expr)
325 << messaget::eom;
327 state,
328 lhs,
330 }
331}
332
333// TODO: the following 4 functions (`symex_field_static_init`,
334// `symex_field_static_init_string_constant`,
335// `symex_field_local_init`,
336// `symex_field_dynamic_init`) do filtering on
337// the input symbol name and then call `initialize_shadow_memory` accordingly.
338// We want to refactor and improve the way the filtering is done, but given
339// that we don't have an easy mechanism to validate that we haven't changed the
340// behaviour, we want to postpone changing this until the full shadow memory
341// functionalities are integrated and we have good regression/unit testing.
342
344 goto_symex_statet &state,
345 const ssa_exprt &lhs)
346{
347 if(lhs.get_original_expr().id() != ID_symbol)
348 return;
349
350 const irep_idt &identifier =
351 to_symbol_expr(lhs.get_original_expr()).get_identifier();
352
354 return;
355
356 if(
357 identifier.starts_with(CPROVER_PREFIX) &&
358 !identifier.starts_with(CPROVER_PREFIX "errno"))
359 {
360 return;
361 }
362
363 const symbolt &symbol = ns.lookup(identifier);
364
365 if(
366 (id2string(symbol.name).find("::return_value") == std::string::npos &&
367 symbol.is_auxiliary) ||
368 !symbol.is_static_lifetime)
369 return;
370 if(id2string(symbol.name).find("__cs_") != std::string::npos)
371 return;
372
373 const typet &type = symbol.type;
374 log.debug() << "Shadow memory: global memory " << id2string(identifier)
375 << " of type " << from_type(ns, "", type) << messaget::eom;
376
378 state, lhs, state.shadow_memory.fields.global_fields);
379}
380
382 goto_symex_statet &state,
383 const ssa_exprt &expr,
384 const exprt &rhs)
385{
386 if(
387 expr.get_original_expr().id() == ID_symbol &&
389 .get_identifier()
390 .starts_with(CPROVER_PREFIX))
391 {
392 return;
393 }
394 const index_exprt &index_expr =
395 to_index_expr(to_address_of_expr(rhs).object());
396
397 const typet &type = index_expr.array().type();
398 log.debug() << "Shadow memory: global memory "
399 << id2string(to_string_constant(index_expr.array()).value())
400 << " of type " << from_type(ns, "", type) << messaget::eom;
401
403 state, index_expr.array(), state.shadow_memory.fields.global_fields);
404}
405
407 goto_symex_statet &state,
408 const ssa_exprt &expr)
409{
410 const symbolt &symbol =
411 ns.lookup(to_symbol_expr(expr.get_original_expr()).get_identifier());
412
413 const std::string symbol_name = id2string(symbol.name);
414 if(
415 symbol.is_auxiliary &&
416 symbol_name.find("::return_value") == std::string::npos)
417 return;
418 if(
419 symbol_name.find("malloc::") != std::string::npos &&
420 (symbol_name.find("malloc_size") != std::string::npos ||
421 symbol_name.find("malloc_res") != std::string::npos ||
422 symbol_name.find("record_malloc") != std::string::npos ||
423 symbol_name.find("record_may_leak") != std::string::npos))
424 {
425 return;
426 }
427 if(
428 symbol_name.find("__builtin_alloca::") != std::string::npos &&
429 (symbol_name.find("alloca_size") != std::string::npos ||
430 symbol_name.find("record_malloc") != std::string::npos ||
431 symbol_name.find("record_alloca") != std::string::npos ||
432 symbol_name.find("res") != std::string::npos))
433 {
434 return;
435 }
436 if(symbol_name.find("__cs_") != std::string::npos)
437 return;
438
439 const typet &type = expr.type();
441 log.debug() << "Shadow memory: local memory "
442 << id2string(expr_l1.get_identifier()) << " of type "
443 << from_type(ns, "", type) << messaget::eom;
444
447}
448
450 goto_symex_statet &state,
451 const exprt &expr,
452 const side_effect_exprt &code)
453{
454 log.debug() << "Shadow memory: dynamic memory of type "
455 << from_type(ns, "", expr.type()) << messaget::eom;
456
458 state, expr, state.shadow_memory.fields.global_fields);
459}
460
462 const abstract_goto_modelt &goto_model,
463 message_handlert &message_handler)
464{
466
467 // Gather shadow memory declarations from goto model
468 for(const auto &goto_function : goto_model.get_goto_functions().function_map)
469 {
470 const auto &goto_program = goto_function.second.body;
471 forall_goto_program_instructions(target, goto_program)
472 {
473 if(!target->is_function_call())
474 continue;
475
476 const auto &code_function_call = to_code_function_call(target->code());
477 const exprt &function = code_function_call.function();
478
479 if(function.id() != ID_symbol)
480 continue;
481
482 const irep_idt &identifier = to_symbol_expr(function).get_identifier();
483
484 if(
485 identifier ==
487 {
490 field_definitions.global_fields,
491 true,
492 message_handler);
493 }
494 else if(
495 identifier ==
497 {
500 field_definitions.local_fields,
501 false,
502 message_handler);
503 }
504 }
505 }
506 return field_definitions;
507}
508
512 bool is_global,
513 message_handlert &message_handler)
514{
515 INVARIANT(
516 code_function_call.arguments().size() == 2,
519 " requires 2 arguments");
521
522 exprt expr = code_function_call.arguments()[1];
523
524 messaget log(message_handler);
525 log.debug() << "Shadow memory: declare " << (is_global ? "global " : "local ")
526 << "field " << id2string(field_name) << " of type "
527 << format(expr.type()) << messaget::eom;
529 {
531 "A shadow memory field must be of a bitvector type.");
532 }
533 if(to_bitvector_type(expr.type()).get_width() > 8)
534 {
536 "A shadow memory field must not be larger than 8 bits.");
537 }
538
539 // record the field's initial value (and type)
540 fields[field_name] = expr;
541}
Pre-defined bitvector types.
const bitvector_typet & to_bitvector_type(const typet &type)
Cast a typet to a bitvector_typet.
Abstract interface to eager or lazy GOTO models.
virtual const goto_functionst & get_goto_functions() const =0
Accessor to get a raw goto_functionst.
virtual void clear()
Reset the abstract state.
Definition ai.h:265
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:562
goto_instruction_codet representation of a function call statement.
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition dstring.h:38
bool starts_with(const char *s) const
equivalent of as_string().starts_with(s)
Definition dstring.h:95
Base class for all expressions.
Definition expr.h:56
std::vector< exprt > operandst
Definition expr.h:58
typet & type()
Return the type of the expression.
Definition expr.h:84
const source_locationt & source_location() const
Definition expr.h:231
value_sett value_set
Uses level 1 names, and is used to do dereferencing.
Definition goto_state.h:51
Central data structure: state.
shadow_memory_statet shadow_memory
symbol_tablet symbol_table
contains symbols that are minted during symbolic execution, such as dynamically created objects etc.
symex_targett::sourcet source
Array index operator.
Definition std_expr.h:1470
Thrown when user-provided input cannot be processed.
const irep_idt & id() const
Definition irep.h:388
Class that provides messages with a built-in verbosity 'level'.
Definition message.h:154
mstreamt & debug() const
Definition message.h:421
mstreamt & warning() const
Definition message.h:396
static eomt eom
Definition message.h:289
The null pointer constant.
The shadow memory field definitions.
field_definitiont global_fields
Field definitions for global-scope fields.
field_definitiont local_fields
Field definitions for local-scope fields.
std::map< irep_idt, exprt > field_definitiont
A field definition mapping a field name to its initial value.
shadow_memory_field_definitionst fields
The available shadow memory field definitions.
std::map< irep_idt, std::vector< shadowed_addresst > > address_fields
const symbol_exprt & add_field(goto_symex_statet &state, const exprt &expr, const irep_idt &field_name, const typet &field_type)
Registers a shadow memory field for the given original memory.
void symex_get_field(goto_symex_statet &state, const exprt &lhs, const exprt::operandst &arguments)
Symbolically executes a __CPROVER_get_field call.
static void convert_field_declaration(const code_function_callt &code_function_call, shadow_memory_field_definitionst::field_definitiont &fields, bool is_global, message_handlert &message_handler)
Converts a field declaration.
void symex_field_dynamic_init(goto_symex_statet &state, const exprt &expr, const side_effect_exprt &code)
Initialize global-scope shadow memory for dynamically allocated memory.
void symex_field_static_init(goto_symex_statet &state, const ssa_exprt &lhs)
Initialize global-scope shadow memory for global/static variables.
const std::function< void(goto_symex_statet &, const exprt &, const exprt)> symex_assign
void symex_field_static_init_string_constant(goto_symex_statet &state, const ssa_exprt &expr, const exprt &rhs)
Initialize global-scope shadow memory for string constants.
const namespacet & ns
static shadow_memory_field_definitionst gather_field_declarations(const abstract_goto_modelt &goto_model, message_handlert &message_handler)
Gathers the available shadow memory field definitions (__CPROVER_field_decl calls) from the goto mode...
void symex_field_local_init(goto_symex_statet &state, const ssa_exprt &expr)
Initialize local-scope shadow memory for local variables and parameters.
void initialize_shadow_memory(goto_symex_statet &state, exprt expr, const shadow_memory_field_definitionst::field_definitiont &fields)
Allocates and initializes a shadow memory field for the given original memory.
void symex_set_field(goto_symex_statet &state, const exprt::operandst &arguments)
Symbolically executes a __CPROVER_set_field call.
An expression containing a side effect.
Definition std_code.h:1450
Expression providing an SSA-renamed symbol of expressions.
Definition ssa_expr.h:17
const exprt & get_original_expr() const
Definition ssa_expr.h:33
Expression to hold a symbol (variable)
Definition std_expr.h:131
const irep_idt & get_identifier() const
Definition std_expr.h:160
Symbol table entry.
Definition symbol.h:28
bool is_auxiliary
Definition symbol.h:77
bool is_static_lifetime
Definition symbol.h:70
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition symbol.cpp:121
typet type
Type of symbol.
Definition symbol.h:31
irep_idt name
The unique identifier.
Definition symbol.h:40
The Boolean constant true.
Definition std_expr.h:3190
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition std_expr.h:2081
The type of an expression, extends irept.
Definition type.h:29
Thrown when we encounter an instruction, parameters to an instruction etc.
std::vector< exprt > get_value_set(exprt expr, const namespacet &ns) const
Gets values pointed to by expr, including following dereference operators (i.e.
#define CPROVER_PREFIX
std::optional< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
std::optional< exprt > expr_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns, const exprt &init_byte_expr)
Create a value for type type, with all subtype bytes initialized to the given value.
Expression Initialization.
static format_containert< T > format(const T &o)
Definition format.h:37
symbolt & get_fresh_aux_symbol(const typet &type, const std::string &name_prefix, const std::string &basename_prefix, const source_locationt &source_location, const irep_idt &symbol_mode, const namespacet &ns, symbol_table_baset &symbol_table)
Installs a fresh-named symbol with respect to the given namespace ns with the requested name pattern ...
Fresh auxiliary symbol creation.
const code_function_callt & to_code_function_call(const goto_instruction_codet &code)
#define forall_goto_program_instructions(it, program)
Symbolic Execution.
const std::string & id2string(const irep_idt &d)
Definition irep.h:44
std::string from_expr(const namespacet &ns, const irep_idt &identifier, const exprt &expr)
std::string from_type(const namespacet &ns, const irep_idt &identifier, const typet &type)
API to expression classes for Pointers.
const object_descriptor_exprt & to_object_descriptor_expr(const exprt &expr)
Cast an exprt to an object_descriptor_exprt.
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast an exprt to an address_of_exprt.
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Symex Shadow Memory Instrumentation.
#define SHADOW_MEMORY_GLOBAL_SCOPE
#define SHADOW_MEMORY_LOCAL_SCOPE
#define SHADOW_MEMORY_PREFIX
#define SHADOW_MEMORY_FIELD_DECL
void replace_invalid_object_by_null(exprt &expr)
Replace an invalid object by a null pointer.
void shadow_memory_log_value_set_match(const namespacet &ns, const messaget &log, const exprt &address, const exprt &expr)
Logs a successful match between an address and a value within the value set.
std::optional< exprt > get_shadow_memory(const exprt &expr, const std::vector< exprt > &value_set, const std::vector< shadow_memory_statet::shadowed_addresst > &addresses, const namespacet &ns, const messaget &log, size_t &mux_size)
Get shadow memory values for a given expression within a specified value set.
exprt deref_expr(const exprt &expr)
Wraps a given expression into a dereference_exprt unless it is an address_of_exprt in which case it j...
bool contains_null_or_invalid(const std::vector< exprt > &value_set, const exprt &address)
Given a pointer expression check to see if it can be a null pointer or an invalid object within value...
const exprt & get_field_init_expr(const irep_idt &field_name, const goto_symex_statet &state)
Retrieve the expression that a field was initialised with within a given symex state.
bool check_value_set_contains_only_null_ptr(const namespacet &ns, const messaget &log, const std::vector< exprt > &value_set, const exprt &expr)
Checks if value_set contains only a NULL pointer expression of the same type of expr.
const typet & get_field_init_type(const irep_idt &field_name, const goto_symex_statet &state)
Retrieves the type of the shadow memory by returning the type of the shadow memory initializer value.
void shadow_memory_log_set_field(const namespacet &ns, const messaget &log, const irep_idt &field_name, const exprt &expr, const exprt &value)
Logs setting a value to a given shadow field.
void clean_pointer_expr(exprt &expr)
Clean the given pointer expression so that it has the right shape for being used for identifying shad...
std::vector< std::pair< exprt, exprt > > get_shadow_dereference_candidates(const namespacet &ns, const messaget &log, const exprt &matched_object, const std::vector< shadow_memory_statet::shadowed_addresst > &addresses, const typet &field_type, const exprt &expr, const typet &lhs_type, bool &exact_match)
Get a list of (condition, value) pairs for a certain pointer from the shadow memory,...
exprt build_if_else_expr(const std::vector< std::pair< exprt, exprt > > &conds_values)
Build an if-then-else chain from a vector containing pairs of expressions.
void shadow_memory_log_get_field(const namespacet &ns, const messaget &log, const irep_idt &field_name, const exprt &expr)
Logs getting a value corresponding to a shadow memory field.
irep_idt extract_field_name(const exprt &string_expr)
Extracts the field name identifier from a string expression, e.g.
void shadow_memory_log_value_set(const namespacet &ns, const messaget &log, const std::vector< exprt > &value_set)
Logs the retrieval of the value associated with a given shadow memory field.
void shadow_memory_log_text_and_expr(const namespacet &ns, const messaget &log, const char *text, const exprt &expr)
Generic logging function that will log depending on the configured verbosity.
Symex Shadow Memory Instrumentation Utilities.
#define CHECK_RETURN(CONDITION)
Definition invariant.h:495
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition invariant.h:534
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition invariant.h:423
#define DATA_INVARIANT_WITH_DIAGNOSTICS(CONDITION, REASON,...)
Definition invariant.h:535
ssa_exprt remove_level_2(ssa_exprt ssa)
Definition ssa_expr.cpp:219
#define INITIALIZE_FUNCTION
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
Definition std_expr.h:1538
const typecast_exprt & to_typecast_expr(const exprt &expr)
Cast an exprt to a typecast_exprt.
Definition std_expr.h:2107
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition std_expr.h:272
const string_constantt & to_string_constant(const exprt &expr)
goto_programt::const_targett pc