38 auto original_goto_model =
41 if(!original_goto_model.has_value())
43 log.
error() <<
"Unable to read goto binary for linker script merging"
49 std::list<irep_idt> linker_defined_symbols;
51 linker_defined_symbols,
52 original_goto_model->symbol_table,
54 linker_def_outfile());
79 original_goto_model->symbol_table,
83 log.
error() <<
"Could not add linkerscript defs to symbol table"
105 log.
error() <<
"Could not pointerize all linker-defined expressions"
112 *original_goto_model,
118 log.
error() <<
"Could not write linkerscript-augmented binary"
126 const std::string &elf_binary,
127 const std::string &goto_binary,
130 : elf_binary(elf_binary),
131 goto_binary(goto_binary),
133 log(message_handler),
134 replacement_predicates(
136 "address of array's first member",
141 [](
const exprt &expr) {
142 return expr.
id() == ID_address_of &&
143 expr.
type().
id() == ID_pointer &&
151 .
id() == ID_symbol &&
163 .
id() == ID_signedbv;
170 [](
const exprt &expr) {
171 return expr.
id() == ID_address_of &&
172 expr.
type().
id() == ID_pointer &&
182 [](
const exprt &expr) {
183 return expr.
id() == ID_address_of &&
184 expr.
type().
id() == ID_pointer &&
196 [](
const exprt &expr) {
197 return expr.
id() == ID_symbol && expr.
type().
id() == ID_array;
200 "pointer (does not need pointerizing)",
204 [](
const exprt &expr) {
205 return expr.
id() == ID_symbol && expr.
type().
id() == ID_pointer;
223 std::list<symbol_exprt> to_pointerize;
226 if(to_pointerize.empty())
228 log.
debug() <<
"Pointerizing the symbol-table value of symbol " << it->first
231 it.get_writeable_symbol().value, to_pointerize, linker_values);
232 if(to_pointerize.empty() && fail==0)
235 for(
const auto &sym : to_pointerize)
237 log.
error() <<
" Could not pointerize '" << sym.get_identifier()
238 <<
"' in symbol table entry " << it->first <<
". Pretty:\n"
239 << sym.pretty() <<
"\n";
251 std::list<exprt *> expressions = {&instruction.code_nonconst()};
252 if(instruction.has_condition())
253 expressions.push_back(&instruction.condition_nonconst());
255 for(
exprt *insts : expressions)
257 std::list<symbol_exprt> to_pointerize;
259 if(to_pointerize.empty())
263 if(to_pointerize.empty() && fail==0)
266 for(
const auto &sym : to_pointerize)
268 log.
error() <<
" Could not pointerize '" << sym.get_identifier()
269 <<
"' in function " << gf.first <<
". Pretty:\n"
270 << sym.pretty() <<
"\n";
285 const std::string &shape)
287 auto it=linker_values.find(ident);
288 if(it==linker_values.end())
290 log.
error() <<
"Could not find a new expression for linker script-defined "
296 log.
debug() <<
"Pointerizing linker-defined symbol '" << ident
304 std::list<symbol_exprt> &to_pointerize,
308 for(
auto const &pair : linker_values)
311 if(!pattern.match(expr))
314 const symbol_exprt inner_symbol=pattern.inner_symbol(expr);
317 tmp=
replace_expr(expr, linker_values, inner_symbol, pair.first,
318 pattern.description());
320 auto result=std::find(to_pointerize.begin(), to_pointerize.end(),
322 if(result==to_pointerize.end())
329 to_pointerize.erase(result);
348 std::list<symbol_exprt> &to_pointerize)
const
350 for(
const auto &op : expr.
operands())
352 if(op.id()!=ID_symbol)
356 if(pair!=linker_values.end())
357 to_pointerize.push_back(sym_exp);
359 for(
const auto &op : expr.
operands())
364 The current implementation of
this function is less precise than the
365 commented-out version below. To understand the difference between these
366 implementations, consider the following example:
368 Suppose we have a section in the linker script, 100 bytes long, where the
369 address of the symbol sec_start is the start of the section (value 4096) and the
370 address of sec_end is the end of that section (value 4196).
372 The current implementation synthesizes the
goto-version of the following C:
374 char __sec_array[100];
375 char *sec_start=(&__sec_array[0]);
376 char *sec_end=((&__sec_array[0])+100);
380 This is imprecise
for the following reason: the actual address of the array and
381 the pointers shall be some
random CBMC-
internal address, instead of being 4096
382 and 4196. The linker script, on the other hand, would have specified the exact
383 position of the section, and we even know what the actual values of sec_start
384 and sec_end are from the
object file (these values are in the `addresses` list
385 of the `data` argument to
this function). If the correctness of the code depends
386 on these actual values, then CBMCs model of the code is too imprecise to verify
389 The commented-out version of
this function below synthesizes the following:
391 char *sec_start=4096;
395 This code has both the actual addresses of the start and end of the section and
396 tells CBMC that the intermediate region is valid. However, the allocated_memory
397 macro does not currently allocate an actual
object at the address 4096, so
398 symbolic execution can fail. In particular, the
'allocated memory' is part of
399 __CPROVER_memory, which does not have a bounded size;
this means that (
for
400 example) calls to
memcpy or
memset fail, because the first and third arguments
401 do not have know n size. The commented-out implementation should be reinstated
404 In either
case,
no other changes to the rest of the code (outside
this function)
405 should be necessary. The rest of
this file converts expressions containing the
406 linker-defined symbol into pointer types
if they were not already, and
this is
407 the right behaviour
for both implementations.
411 const std::string &linker_script,
416 std::map<irep_idt, std::size_t> truncated_symbols;
419 bool has_end=d[
"has-end-symbol"].is_true();
423 << d[
"start-symbol"].value;
432 log.warning() <<
"Object section '" << d[
"section"].value <<
"' of size "
433 << array_size <<
" is too large to model. Truncating to "
445 std::ostringstream array_comment;
446 array_comment <<
"Object section '" << d[
"section"].value <<
"' of size "
447 << array_size <<
" bytes";
456 array_sym.is_static_lifetime =
true;
457 array_sym.is_lvalue =
true;
458 array_sym.is_state_var =
true;
459 array_sym.value = *zi;
460 array_sym.location = array_loc;
462 bool failed = symbol_table.
add(array_sym);
471 start_sym.is_static_lifetime =
true;
472 start_sym.is_lvalue =
true;
473 start_sym.is_state_var =
true;
474 start_sym.value = array_start;
475 linker_values.emplace(
476 d[
"start-symbol"].value,
477 std::make_pair(start_sym.symbol_expr(), array_start));
481 auto it = std::find_if(
484 [&d](
const jsont &add) {
485 return add[
"sym"].
value == d[
"start-symbol"].value;
489 log.error() <<
"Start: Could not find address corresponding to symbol '"
490 << d[
"start-symbol"].value <<
"' (start of section)"
496 std::ostringstream start_comment;
497 start_comment <<
"Pointer to beginning of object section '"
498 << d[
"section"].value <<
"'. Original address in object file"
499 <<
" is " << (*it)[
"val"].value;
501 start_sym.location = start_loc;
503 auto start_sym_entry = symbol_table.
insert(start_sym);
504 if(!start_sym_entry.second)
505 start_sym_entry.first = start_sym;
509 plus_exprt array_end(array_start, array_size_expr);
512 end_sym.is_static_lifetime =
true;
513 end_sym.is_lvalue =
true;
514 end_sym.is_state_var =
true;
515 end_sym.value = array_end;
516 linker_values.emplace(
517 d[
"end-symbol"].value,
518 std::make_pair(end_sym.symbol_expr(), array_end));
520 auto entry = std::find_if(
523 [&d](
const jsont &add) {
524 return add[
"sym"].
value == d[
"end-symbol"].value;
528 log.debug() <<
"Could not find address corresponding to symbol '"
529 << d[
"end-symbol"].value <<
"' (end of section)"
535 std::ostringstream end_comment;
536 end_comment <<
"Pointer to end of object section '" << d[
"section"].value
537 <<
"'. Original address in object file"
538 <<
" is " << (*entry)[
"val"].value;
540 end_sym.location = end_loc;
542 auto end_sym_entry = symbol_table.
insert(end_sym);
543 if(!end_sym_entry.second)
544 end_sym_entry.first = end_sym;
555 auto it=linker_values.find(
irep_idt(d[
"sym"].value));
556 if(it!=linker_values.end())
563 const auto &pair=truncated_symbols.find(d[
"sym"].value);
564 if(pair==truncated_symbols.end())
565 symbol_value=d[
"val"].value;
569 <<
"Truncating the value of symbol " << d[
"sym"].value <<
" from "
571 <<
" because it corresponds to the size of a too-large section."
578 loc.
set_comment(
"linker script-defined symbol: char *"+
579 d[
"sym"].value+
"="+
"(char *)"+
id2string(symbol_value)+
"u;");
594 symbol.
value = rhs_tc;
596 linker_values.emplace(
616 loc.set_comment(
"linker script-defined region:\n"+d[
"commt"].value+
":\n"+
618 f.add_source_location()=loc;
621 i.make_function_call(f);
622 initialize_instructions.push_front(i);
631 sym.is_lvalue=sym.is_static_lifetime=
true;
632 symbol_table.
add(sym);
639 loc.
set_comment(
"linker script-defined symbol: char *"+
640 d[
"sym"].value+
"="+
"(char *)"+d[
"val"].value+
"u;");
652 linker_values.emplace(
653 irep_idt(d[
"sym"].value), std::make_pair(lhs, rhs_tc));
656 assign.add_source_location()=loc;
658 assign_i.make_assignment(assign);
659 initialize_instructions.push_front(assign_i);
666 std::list<irep_idt> &linker_defined_symbols,
668 const std::string &out_file,
669 const std::string &def_out_file)
671 for(
auto const &pair : symbol_table.
symbols)
674 pair.second.is_extern && pair.second.value.is_nil() &&
677 linker_defined_symbols.push_back(pair.second.name);
681 std::ostringstream linker_def_str;
683 linker_defined_symbols.begin(),
684 linker_defined_symbols.end(),
685 std::ostream_iterator<irep_idt>(linker_def_str,
"\n"));
686 log.
debug() <<
"Linker-defined symbols: [" << linker_def_str.str() <<
"]\n"
690 std::ofstream linker_def_file(linker_def_infile());
691 linker_def_file << linker_def_str.str();
692 linker_def_file.close();
694 std::vector<std::string> argv=
698 "--object", out_file,
699 "--sym-file", linker_def_infile(),
700 "--out-file", def_out_file
704 argv.push_back(
"--very-verbose");
706 argv.push_back(
"--verbose");
709 for(std::size_t i=0; i<argv.size(); i++)
713 int rc =
run(argv[0], argv, linker_def_infile(), def_out_file,
"");
721 const std::list<irep_idt> &linker_defined_symbols,
725 for(
const auto &sym : linker_defined_symbols)
726 if(linker_values.find(sym)==linker_values.end())
729 <<
"' was declared extern but never given a value, even in "
734 linker_values.emplace(sym, std::make_pair(null_sym, null_pointer));
737 for(
const auto &pair : linker_values)
739 auto it=std::find(linker_defined_symbols.begin(),
740 linker_defined_symbols.end(), pair.first);
741 if(it==linker_defined_symbols.end())
745 <<
"Linker script-defined symbol '" << pair.first <<
"' was "
746 <<
"either defined in the C source code, not declared extern in "
747 <<
"the C source code, or does not appear in the C source code"
761 !(data_object.
find(
"regions") != data_object.
end() &&
762 data_object.
find(
"addresses") != data_object.
end() &&
772 return address.
find(
"val") != address.end() &&
773 address.find(
"sym") != address.end() &&
774 address[
"val"].is_number() && address[
"sym"].is_string();
784 return region.
find(
"start") != region.end() &&
785 region.find(
"size") != region.end() &&
786 region.find(
"annot") != region.end() &&
787 region.find(
"commt") != region.end() &&
788 region.find(
"start-symbol") != region.end() &&
789 region.find(
"has-end-symbol") != region.end() &&
790 region.find(
"section") != region.end() &&
791 region[
"start"].is_number() && region[
"size"].is_number() &&
792 region[
"annot"].is_string() &&
793 region[
"start-symbol"].is_string() &&
794 region[
"section"].is_string() && region[
"commt"].is_string() &&
795 ((region[
"has-end-symbol"].is_true() &&
796 region.find(
"end-symbol") != region.end() &&
797 region[
"end-symbol"].is_string()) ||
798 (region[
"has-end-symbol"].is_false() &&
799 region.find(
"size-symbol") != region.end() &&
800 region.find(
"end-symbol") == region.end() &&
801 region[
"size-symbol"].is_string()));
std::string array_name(const namespacet &ns, const exprt &expr)
unsignedbv_typet unsigned_int_type()
pointer_typet pointer_type(const typet &subtype)
bitvector_typet char_type()
Operator to return the address of an object.
std::string get_value(char option) const
virtual bool isset(char option) const
A goto_instruction_codet representing an assignment in the program.
goto_instruction_codet representation of a function call statement.
static bool write_bin_object_file(const std::string &file_name, const goto_modelt &src_goto_model, bool validate_goto_model, message_handlert &message_handler)
Writes the goto functions of src_goto_model to a binary format object file.
A constant literal expression.
void set_value(const irep_idt &value)
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Base class for all expressions.
source_locationt & add_source_location()
const source_locationt & source_location() const
typet & type()
Return the type of the expression.
bool is_constant() const
Return whether the expression is a constant.
function_mapt function_map
symbol_tablet symbol_table
Symbol table.
goto_functionst goto_functions
GOTO functions.
This class represents an instruction in the GOTO intermediate representation.
A generic container class for the GOTO intermediate representation of one function.
instructionst instructions
The list of instructions in the goto program.
std::list< instructiont > instructionst
const irep_idt & id() const
iterator find(const std::string &key)
void output(std::ostream &out) const
linker_script_merget(const std::string &elf_binary, const std::string &goto_binary, const cmdlinet &, message_handlert &)
int pointerize_linker_defined_symbols(goto_modelt &, const linker_valuest &)
convert the type of linker script-defined symbols to char*
void symbols_to_pointerize(const linker_valuest &linker_values, const exprt &expr, std::list< symbol_exprt > &to_pointerize) const
fill to_pointerize with names of linker symbols appearing in expr
int add_linker_script_definitions()
Add values of linkerscript-defined symbols to the goto-binary.
const std::string & elf_binary
int get_linker_script_data(std::list< irep_idt > &linker_defined_symbols, const symbol_tablet &symbol_table, const std::string &out_file, const std::string &def_out_file)
Write linker script definitions to linker_data.
std::map< irep_idt, std::pair< symbol_exprt, exprt > > linker_valuest
int goto_and_object_mismatch(const std::list< irep_idt > &linker_defined_symbols, linker_valuest &linker_values)
one-to-one correspondence between extern & linker symbols
int ls_data2instructions(jsont &data, const std::string &linker_script, symbol_tablet &symbol_table, linker_valuest &linker_values)
Write a list of definitions derived from data into the symbol_table.
int pointerize_subexprs_of(exprt &expr, std::list< symbol_exprt > &to_pointerize, const linker_valuest &linker_values)
std::list< replacement_predicatet > replacement_predicates
The "shapes" of expressions to be replaced by a pointer.
int replace_expr(exprt &old_expr, const linker_valuest &linker_values, const symbol_exprt &old_symbol, const irep_idt &ident, const std::string &shape)
do the actual replacement of an expr with a new pointer expr
int linker_data_is_malformed(const jsont &data) const
Validate output of the scripts/ls_parse.py tool.
const std::string & goto_binary
unsigned get_verbosity() const
source_locationt source_location
mstreamt & warning() const
message_handlert & get_message_handler()
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
The null pointer constant.
The plus expression Associativity is not specified.
Patterns of expressions that should be replaced.
void set_comment(const irep_idt &comment)
void set_file(const irep_idt &file)
Expression to hold a symbol (variable)
const irep_idt & get_identifier() const
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
const symbolst & symbols
Read-only field, used to look up symbols given their names.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
virtual iteratort begin() override
virtual iteratort end() override
virtual std::pair< symbolt &, bool > insert(symbolt symbol) override
Author: Diffblue Ltd.
source_locationt location
Source code location of definition of symbol.
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
typet type
Type of symbol.
exprt value
Initial value of symbol.
Semantic type conversion.
static exprt conditional_cast(const exprt &expr, const typet &type)
Compile and link source and object files.
std::optional< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Expression Initialization.
Goto Programs with Functions.
const std::string & id2string(const irep_idt &d)
json_arrayt & to_json_array(jsont &json)
json_objectt & to_json_object(jsont &json)
bool parse_json(std::istream &in, const std::string &filename, message_handlert &message_handler, jsont &dest)
Merge linker script-defined symbols into a goto-program.
Magic numbers used throughout the codebase.
const std::size_t MAX_FLATTENED_ARRAY_SIZE
const mp_integer string2integer(const std::string &n, unsigned base)
API to expression classes for Pointers.
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast an exprt to an address_of_exprt.
static bool read_goto_binary(const std::string &filename, symbol_tablet &, goto_functionst &, message_handlert &)
Read a goto binary from a file, but do not update config.
int run(const std::string &what, const std::vector< std::string > &argv)
#define CHECK_RETURN(CONDITION)
void recreate_initialize_function(goto_modelt &goto_model, message_handlert &message_handler)
Regenerates the CPROVER_INITIALIZE function, which initializes all non-function symbols of the goto m...
#define INITIALIZE_FUNCTION
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
unsigned safe_string2unsigned(const std::string &str, int base)
void * memcpy(void *dst, const void *src, size_t n)
void * memset(void *s, int c, size_t n)
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
static bool failed(bool error_indicator)