CBMC
linker_script_merge.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Linker Script Merging
4 
5 Author: Kareem Khazem <karkhaz@karkhaz.com>, 2017
6 
7 \*******************************************************************/
8 
9 #include "linker_script_merge.h"
10 
11 #include <util/arith_tools.h>
12 #include <util/c_types.h>
13 #include <util/cmdline.h>
14 #include <util/expr_initializer.h>
15 #include <util/magic.h>
16 #include <util/pointer_expr.h>
17 #include <util/run.h>
18 #include <util/tempfile.h>
19 
23 
24 #include <json/json_parser.h>
26 
27 #include "compile.h"
28 
29 #include <algorithm>
30 #include <fstream> // IWYU pragma: keep
31 #include <iterator>
32 
34 {
35  if(!cmdline.isset('T'))
36  return 0;
37 
38  auto original_goto_model =
40 
41  if(!original_goto_model.has_value())
42  {
43  log.error() << "Unable to read goto binary for linker script merging"
44  << messaget::eom;
45  return 1;
46  }
47 
48  temporary_filet linker_def_outfile("goto-cc-linker-info", ".json");
49  std::list<irep_idt> linker_defined_symbols;
50  int fail = get_linker_script_data(
51  linker_defined_symbols,
52  original_goto_model->symbol_table,
53  elf_binary,
54  linker_def_outfile());
55  // ignore linker script parsing failures until the code is tested more widely
56  if(fail!=0)
57  return 0;
58 
59  jsont data;
60  fail = parse_json(linker_def_outfile(), log.get_message_handler(), data);
61  if(fail!=0)
62  {
63  log.error() << "Problem parsing linker script JSON data" << messaget::eom;
64  return fail;
65  }
66 
67  fail=linker_data_is_malformed(data);
68  if(fail!=0)
69  {
70  log.error() << "Malformed linker-script JSON document" << messaget::eom;
71  data.output(log.error());
72  return fail;
73  }
74 
75  linker_valuest linker_values;
76  fail = ls_data2instructions(
77  data,
78  cmdline.get_value('T'),
79  original_goto_model->symbol_table,
80  linker_values);
81  if(fail!=0)
82  {
83  log.error() << "Could not add linkerscript defs to symbol table"
84  << messaget::eom;
85  return fail;
86  }
87 
88  if(original_goto_model->can_produce_function(INITIALIZE_FUNCTION))
89  {
91  *original_goto_model, log.get_message_handler());
92  }
93 
94  fail=goto_and_object_mismatch(linker_defined_symbols, linker_values);
95  if(fail!=0)
96  return fail;
97 
98  // The keys of linker_values are exactly the elements of
99  // linker_defined_symbols, so iterate over linker_values from now on.
100 
101  fail = pointerize_linker_defined_symbols(*original_goto_model, linker_values);
102 
103  if(fail!=0)
104  {
105  log.error() << "Could not pointerize all linker-defined expressions"
106  << messaget::eom;
107  return fail;
108  }
109 
111  goto_binary,
112  *original_goto_model,
113  cmdline.isset("validate-goto-model"),
115 
116  if(fail!=0)
117  {
118  log.error() << "Could not write linkerscript-augmented binary"
119  << messaget::eom;
120  }
121 
122  return fail;
123 }
124 
126  const std::string &elf_binary,
127  const std::string &goto_binary,
128  const cmdlinet &cmdline,
129  message_handlert &message_handler)
130  : elf_binary(elf_binary),
131  goto_binary(goto_binary),
132  cmdline(cmdline),
133  log(message_handler),
134  replacement_predicates(
136  "address of array's first member",
137  [](const exprt &expr) -> const symbol_exprt & {
138  return to_symbol_expr(
139  to_index_expr(to_address_of_expr(expr).object()).index());
140  },
141  [](const exprt &expr) {
142  return expr.id() == ID_address_of &&
143  expr.type().id() == ID_pointer &&
144 
145  to_address_of_expr(expr).object().id() == ID_index &&
146  to_address_of_expr(expr).object().type().id() ==
147  ID_unsignedbv &&
148 
149  to_index_expr(to_address_of_expr(expr).object())
150  .array()
151  .id() == ID_symbol &&
152  to_index_expr(to_address_of_expr(expr).object())
153  .array()
154  .type()
155  .id() == ID_array &&
156 
157  to_index_expr(to_address_of_expr(expr).object())
158  .index()
159  .is_constant() &&
160  to_index_expr(to_address_of_expr(expr).object())
161  .index()
162  .type()
163  .id() == ID_signedbv;
164  }),
166  "address of array",
167  [](const exprt &expr) -> const symbol_exprt & {
168  return to_symbol_expr(to_address_of_expr(expr).object());
169  },
170  [](const exprt &expr) {
171  return expr.id() == ID_address_of &&
172  expr.type().id() == ID_pointer &&
173 
174  to_address_of_expr(expr).object().id() == ID_symbol &&
175  to_address_of_expr(expr).object().type().id() == ID_array;
176  }),
178  "address of struct",
179  [](const exprt &expr) -> const symbol_exprt & {
180  return to_symbol_expr(to_address_of_expr(expr).object());
181  },
182  [](const exprt &expr) {
183  return expr.id() == ID_address_of &&
184  expr.type().id() == ID_pointer &&
185 
186  to_address_of_expr(expr).object().id() == ID_symbol &&
187  (to_address_of_expr(expr).object().type().id() == ID_struct ||
188  to_address_of_expr(expr).object().type().id() ==
189  ID_struct_tag);
190  }),
192  "array variable",
193  [](const exprt &expr) -> const symbol_exprt & {
194  return to_symbol_expr(expr);
195  },
196  [](const exprt &expr) {
197  return expr.id() == ID_symbol && expr.type().id() == ID_array;
198  }),
200  "pointer (does not need pointerizing)",
201  [](const exprt &expr) -> const symbol_exprt & {
202  return to_symbol_expr(expr);
203  },
204  [](const exprt &expr) {
205  return expr.id() == ID_symbol && expr.type().id() == ID_pointer;
206  })})
207 {}
208 
210  goto_modelt &goto_model,
211  const linker_valuest &linker_values)
212 {
213  const namespacet ns(goto_model.symbol_table);
214 
215  int ret=0;
216 
217  // Next, find all occurrences of linker-defined symbols that are _values_
218  // of some symbol in the symbol table, and pointerize them too
219  for(auto it = goto_model.symbol_table.begin();
220  it != goto_model.symbol_table.end();
221  ++it)
222  {
223  std::list<symbol_exprt> to_pointerize;
224  symbols_to_pointerize(linker_values, it->second.value, to_pointerize);
225 
226  if(to_pointerize.empty())
227  continue;
228  log.debug() << "Pointerizing the symbol-table value of symbol " << it->first
229  << messaget::eom;
230  int fail = pointerize_subexprs_of(
231  it.get_writeable_symbol().value, to_pointerize, linker_values);
232  if(to_pointerize.empty() && fail==0)
233  continue;
234  ret=1;
235  for(const auto &sym : to_pointerize)
236  {
237  log.error() << " Could not pointerize '" << sym.get_identifier()
238  << "' in symbol table entry " << it->first << ". Pretty:\n"
239  << sym.pretty() << "\n";
240  }
241  log.error() << messaget::eom;
242  }
243 
244  // Finally, pointerize all occurrences of linker-defined symbols in the
245  // goto program
246  for(auto &gf : goto_model.goto_functions.function_map)
247  {
248  goto_programt &program=gf.second.body;
249  for(auto &instruction : program.instructions)
250  {
251  std::list<exprt *> expressions = {&instruction.code_nonconst()};
252  if(instruction.has_condition())
253  expressions.push_back(&instruction.condition_nonconst());
254 
255  for(exprt *insts : expressions)
256  {
257  std::list<symbol_exprt> to_pointerize;
258  symbols_to_pointerize(linker_values, *insts, to_pointerize);
259  if(to_pointerize.empty())
260  continue;
261  log.debug() << "Pointerizing a program expression..." << messaget::eom;
262  int fail = pointerize_subexprs_of(*insts, to_pointerize, linker_values);
263  if(to_pointerize.empty() && fail==0)
264  continue;
265  ret=1;
266  for(const auto &sym : to_pointerize)
267  {
268  log.error() << " Could not pointerize '" << sym.get_identifier()
269  << "' in function " << gf.first << ". Pretty:\n"
270  << sym.pretty() << "\n";
271  log.error().source_location = instruction.source_location();
272  }
273  log.error() << messaget::eom;
274  }
275  }
276  }
277  return ret;
278 }
279 
281  exprt &old_expr,
282  const linker_valuest &linker_values,
283  const symbol_exprt &old_symbol,
284  const irep_idt &ident,
285  const std::string &shape)
286 {
287  auto it=linker_values.find(ident);
288  if(it==linker_values.end())
289  {
290  log.error() << "Could not find a new expression for linker script-defined "
291  << "symbol '" << ident << "'" << messaget::eom;
292  return 1;
293  }
294  symbol_exprt new_expr=it->second.first;
295  new_expr.add_source_location()=old_symbol.source_location();
296  log.debug() << "Pointerizing linker-defined symbol '" << ident
297  << "' of shape <" << shape << ">." << messaget::eom;
298  old_expr=new_expr;
299  return 0;
300 }
301 
303  exprt &expr,
304  std::list<symbol_exprt> &to_pointerize,
305  const linker_valuest &linker_values)
306 {
307  int fail=0, tmp=0;
308  for(auto const &pair : linker_values)
309  for(auto const &pattern : replacement_predicates)
310  {
311  if(!pattern.match(expr))
312  continue;
313  // take a copy, expr will be changed below
314  const symbol_exprt inner_symbol=pattern.inner_symbol(expr);
315  if(pair.first!=inner_symbol.get_identifier())
316  continue;
317  tmp=replace_expr(expr, linker_values, inner_symbol, pair.first,
318  pattern.description());
319  fail=tmp?tmp:fail;
320  auto result=std::find(to_pointerize.begin(), to_pointerize.end(),
321  inner_symbol);
322  if(result==to_pointerize.end())
323  {
324  fail=1;
325  log.error() << "Too many removals of '" << inner_symbol.get_identifier()
326  << "'" << messaget::eom;
327  }
328  else
329  to_pointerize.erase(result);
330  // If we get here, we've just pointerized this expression. That expression
331  // will be a symbol possibly wrapped in some unary junk, but won't contain
332  // other symbols as subexpressions that might need to be pointerized. So
333  // it's safe to bail out here.
334  return fail;
335  }
336 
337  for(auto &op : expr.operands())
338  {
339  tmp = pointerize_subexprs_of(op, to_pointerize, linker_values);
340  fail=tmp?tmp:fail;
341  }
342  return fail;
343 }
344 
346  const linker_valuest &linker_values,
347  const exprt &expr,
348  std::list<symbol_exprt> &to_pointerize) const
349 {
350  for(const auto &op : expr.operands())
351  {
352  if(op.id()!=ID_symbol)
353  continue;
354  const symbol_exprt &sym_exp=to_symbol_expr(op);
355  const auto &pair=linker_values.find(sym_exp.get_identifier());
356  if(pair!=linker_values.end())
357  to_pointerize.push_back(sym_exp);
358  }
359  for(const auto &op : expr.operands())
360  symbols_to_pointerize(linker_values, op, to_pointerize);
361 }
362 
363 #if 0
364 The current implementation of this function is less precise than the
365  commented-out version below. To understand the difference between these
366  implementations, consider the following example:
367 
368 Suppose we have a section in the linker script, 100 bytes long, where the
369 address of the symbol sec_start is the start of the section (value 4096) and the
370 address of sec_end is the end of that section (value 4196).
371 
372 The current implementation synthesizes the goto-version of the following C:
373 
374  char __sec_array[100];
375  char *sec_start=(&__sec_array[0]);
376  char *sec_end=((&__sec_array[0])+100);
377  // Yes, it is 100 not 99. We're pointing to the end of the memory occupied
378  // by __sec_array, not the last element of __sec_array.
379 
380 This is imprecise for the following reason: the actual address of the array and
381 the pointers shall be some random CBMC-internal address, instead of being 4096
382 and 4196. The linker script, on the other hand, would have specified the exact
383 position of the section, and we even know what the actual values of sec_start
384 and sec_end are from the object file (these values are in the `addresses` list
385 of the `data` argument to this function). If the correctness of the code depends
386 on these actual values, then CBMCs model of the code is too imprecise to verify
387 this.
388 
389 The commented-out version of this function below synthesizes the following:
390 
391  char *sec_start=4096;
392  char *sec_end=4196;
393  __CPROVER_allocated_memory(4096, 100);
394 
395 This code has both the actual addresses of the start and end of the section and
396 tells CBMC that the intermediate region is valid. However, the allocated_memory
397 macro does not currently allocate an actual object at the address 4096, so
398 symbolic execution can fail. In particular, the 'allocated memory' is part of
399 __CPROVER_memory, which does not have a bounded size; this means that (for
400 example) calls to memcpy or memset fail, because the first and third arguments
401 do not have know n size. The commented-out implementation should be reinstated
402 once this limitation of __CPROVER_allocated_memory has been fixed.
403 
404 In either case, no other changes to the rest of the code (outside this function)
405 should be necessary. The rest of this file converts expressions containing the
406 linker-defined symbol into pointer types if they were not already, and this is
407 the right behaviour for both implementations.
408 #endif
410  jsont &data,
411  const std::string &linker_script,
412  symbol_tablet &symbol_table,
413  linker_valuest &linker_values)
414 #if 1
415 {
416  std::map<irep_idt, std::size_t> truncated_symbols;
417  for(auto &d : to_json_array(data["regions"]))
418  {
419  bool has_end=d["has-end-symbol"].is_true();
420 
421  std::ostringstream array_name;
422  array_name << CPROVER_PREFIX << "linkerscript-array_"
423  << d["start-symbol"].value;
424  if(has_end)
425  array_name << "-" << d["end-symbol"].value;
426 
427 
428  // Array symbol_exprt
429  mp_integer array_size = string2integer(d["size"].value);
430  if(array_size > MAX_FLATTENED_ARRAY_SIZE)
431  {
432  log.warning() << "Object section '" << d["section"].value << "' of size "
433  << array_size << " is too large to model. Truncating to "
434  << MAX_FLATTENED_ARRAY_SIZE << " bytes" << messaget::eom;
435  array_size=MAX_FLATTENED_ARRAY_SIZE;
436  if(!has_end)
437  truncated_symbols[d["size-symbol"].value]=MAX_FLATTENED_ARRAY_SIZE;
438  }
439 
440  constant_exprt array_size_expr=from_integer(array_size, size_type());
441  array_typet array_type(char_type(), array_size_expr);
442 
443  source_locationt array_loc;
444  array_loc.set_file(linker_script);
445  std::ostringstream array_comment;
446  array_comment << "Object section '" << d["section"].value << "' of size "
447  << array_size << " bytes";
448  array_loc.set_comment(array_comment.str());
449 
450  namespacet ns(symbol_table);
451  const auto zi = zero_initializer(array_type, array_loc, ns);
452  CHECK_RETURN(zi.has_value());
453 
454  // Add the array to the symbol table.
455  symbolt array_sym{array_name.str(), array_type, ID_C};
456  array_sym.is_static_lifetime = true;
457  array_sym.is_lvalue = true;
458  array_sym.is_state_var = true;
459  array_sym.value = *zi;
460  array_sym.location = array_loc;
461 
462  bool failed = symbol_table.add(array_sym);
464 
465  // Array start address
466  index_exprt zero_idx{array_sym.symbol_expr(), from_integer(0, size_type())};
467  address_of_exprt array_start(zero_idx);
468 
469  // Linker-defined symbol_exprt pointing to start address
470  symbolt start_sym{d["start-symbol"].value, pointer_type(char_type()), ID_C};
471  start_sym.is_static_lifetime = true;
472  start_sym.is_lvalue = true;
473  start_sym.is_state_var = true;
474  start_sym.value = array_start;
475  linker_values.emplace(
476  d["start-symbol"].value,
477  std::make_pair(start_sym.symbol_expr(), array_start));
478 
479  // Since the value of the pointer will be a random CBMC address, write a
480  // note about the real address in the object file
481  auto it = std::find_if(
482  to_json_array(data["addresses"]).begin(),
483  to_json_array(data["addresses"]).end(),
484  [&d](const jsont &add) {
485  return add["sym"].value == d["start-symbol"].value;
486  });
487  if(it == to_json_array(data["addresses"]).end())
488  {
489  log.error() << "Start: Could not find address corresponding to symbol '"
490  << d["start-symbol"].value << "' (start of section)"
491  << messaget::eom;
492  return 1;
493  }
494  source_locationt start_loc;
495  start_loc.set_file(linker_script);
496  std::ostringstream start_comment;
497  start_comment << "Pointer to beginning of object section '"
498  << d["section"].value << "'. Original address in object file"
499  << " is " << (*it)["val"].value;
500  start_loc.set_comment(start_comment.str());
501  start_sym.location = start_loc;
502 
503  auto start_sym_entry = symbol_table.insert(start_sym);
504  if(!start_sym_entry.second)
505  start_sym_entry.first = start_sym;
506 
507  if(has_end) // Same for pointer to end of array
508  {
509  plus_exprt array_end(array_start, array_size_expr);
510 
511  symbolt end_sym{d["end-symbol"].value, pointer_type(char_type()), ID_C};
512  end_sym.is_static_lifetime = true;
513  end_sym.is_lvalue = true;
514  end_sym.is_state_var = true;
515  end_sym.value = array_end;
516  linker_values.emplace(
517  d["end-symbol"].value,
518  std::make_pair(end_sym.symbol_expr(), array_end));
519 
520  auto entry = std::find_if(
521  to_json_array(data["addresses"]).begin(),
522  to_json_array(data["addresses"]).end(),
523  [&d](const jsont &add) {
524  return add["sym"].value == d["end-symbol"].value;
525  });
526  if(entry == to_json_array(data["addresses"]).end())
527  {
528  log.debug() << "Could not find address corresponding to symbol '"
529  << d["end-symbol"].value << "' (end of section)"
530  << messaget::eom;
531  return 1;
532  }
533  source_locationt end_loc;
534  end_loc.set_file(linker_script);
535  std::ostringstream end_comment;
536  end_comment << "Pointer to end of object section '" << d["section"].value
537  << "'. Original address in object file"
538  << " is " << (*entry)["val"].value;
539  end_loc.set_comment(end_comment.str());
540  end_sym.location = end_loc;
541 
542  auto end_sym_entry = symbol_table.insert(end_sym);
543  if(!end_sym_entry.second)
544  end_sym_entry.first = end_sym;
545  }
546  }
547 
548  // We've added every linker-defined symbol that marks the start or end of a
549  // region. But there might be other linker-defined symbols with some random
550  // address. These will have been declared extern too, so we need to give them
551  // a value also. Here, we give them the actual value that they have in the
552  // object file, since we're not assigning any object to them.
553  for(const auto &d : to_json_array(data["addresses"]))
554  {
555  auto it=linker_values.find(irep_idt(d["sym"].value));
556  if(it!=linker_values.end())
557  // sym marks the start or end of a region; already dealt with.
558  continue;
559 
560  // Perhaps this is a size symbol for a section whose size we truncated
561  // earlier.
562  irep_idt symbol_value;
563  const auto &pair=truncated_symbols.find(d["sym"].value);
564  if(pair==truncated_symbols.end())
565  symbol_value=d["val"].value;
566  else
567  {
568  log.debug()
569  << "Truncating the value of symbol " << d["sym"].value << " from "
570  << d["val"].value << " to " << MAX_FLATTENED_ARRAY_SIZE
571  << " because it corresponds to the size of a too-large section."
572  << messaget::eom;
574  }
575 
576  source_locationt loc;
577  loc.set_file(linker_script);
578  loc.set_comment("linker script-defined symbol: char *"+
579  d["sym"].value+"="+"(char *)"+id2string(symbol_value)+"u;");
580 
581  constant_exprt rhs(
583  string2integer(id2string(symbol_value)),
584  unsigned_int_type().get_width()),
586 
587  typecast_exprt rhs_tc(rhs, pointer_type(char_type()));
588 
589  symbolt &symbol = symbol_table.get_writeable_ref(d["sym"].value);
590  symbol.is_extern = false;
591  symbol.is_static_lifetime = true;
592  symbol.location = loc;
593  symbol.type = pointer_type(char_type());
594  symbol.value = rhs_tc;
595 
596  linker_values.emplace(
597  irep_idt(d["sym"].value), std::make_pair(symbol.symbol_expr(), rhs_tc));
598  }
599  return 0;
600 }
601 #else
602 {
603  goto_programt::instructionst initialize_instructions=gp.instructions;
604  for(const auto &d : to_json_array(data["regions"]))
605  {
606  unsigned start=safe_string2unsigned(d["start"].value);
607  unsigned size=safe_string2unsigned(d["size"].value);
608  constant_exprt first=from_integer(start, size_type());
609  constant_exprt second=from_integer(size, size_type());
610  const code_typet void_t({}, empty_typet());
612  symbol_exprt(CPROVER_PREFIX "allocated_memory", void_t), {first, second});
613 
614  source_locationt loc;
615  loc.set_file(linker_script);
616  loc.set_comment("linker script-defined region:\n"+d["commt"].value+":\n"+
617  d["annot"].value);
618  f.add_source_location()=loc;
619 
621  i.make_function_call(f);
622  initialize_instructions.push_front(i);
623  }
624 
625  if(!symbol_table.has_symbol(CPROVER_PREFIX "allocated_memory"))
626  {
627  symbolt sym{
628  CPROVER_PREFIX "allocated_memory",
629  code_typet({}, empty_typet()),
630  ID_C} sym.pretty_name = CPROVER_PREFIX "allocated_memory";
631  sym.is_lvalue=sym.is_static_lifetime=true;
632  symbol_table.add(sym);
633  }
634 
635  for(const auto &d : to_json_array(data["addresses"]))
636  {
637  source_locationt loc;
638  loc.set_file(linker_script);
639  loc.set_comment("linker script-defined symbol: char *"+
640  d["sym"].value+"="+"(char *)"+d["val"].value+"u;");
641 
642  symbol_exprt lhs(d["sym"].value, pointer_type(char_type()));
643 
644  constant_exprt rhs;
646  string2integer(d["val"].value), unsigned_int_type().get_width()));
647  rhs.type()=unsigned_int_type();
648 
649  exprt rhs_tc =
651 
652  linker_values.emplace(
653  irep_idt(d["sym"].value), std::make_pair(lhs, rhs_tc));
654 
655  code_assignt assign(lhs, rhs_tc);
656  assign.add_source_location()=loc;
658  assign_i.make_assignment(assign);
659  initialize_instructions.push_front(assign_i);
660  }
661  return 0;
662 }
663 #endif
664 
666  std::list<irep_idt> &linker_defined_symbols,
667  const symbol_tablet &symbol_table,
668  const std::string &out_file,
669  const std::string &def_out_file)
670 {
671  for(auto const &pair : symbol_table.symbols)
672  {
673  if(
674  pair.second.is_extern && pair.second.value.is_nil() &&
675  pair.second.name != CPROVER_PREFIX "memory")
676  {
677  linker_defined_symbols.push_back(pair.second.name);
678  }
679  }
680 
681  std::ostringstream linker_def_str;
682  std::copy(
683  linker_defined_symbols.begin(),
684  linker_defined_symbols.end(),
685  std::ostream_iterator<irep_idt>(linker_def_str, "\n"));
686  log.debug() << "Linker-defined symbols: [" << linker_def_str.str() << "]\n"
687  << messaget::eom;
688 
689  temporary_filet linker_def_infile("goto-cc-linker-defs", "");
690  std::ofstream linker_def_file(linker_def_infile());
691  linker_def_file << linker_def_str.str();
692  linker_def_file.close();
693 
694  std::vector<std::string> argv=
695  {
696  "ls_parse.py",
697  "--script", cmdline.get_value('T'),
698  "--object", out_file,
699  "--sym-file", linker_def_infile(),
700  "--out-file", def_out_file
701  };
702 
704  argv.push_back("--very-verbose");
706  argv.push_back("--verbose");
707 
708  log.debug() << "RUN:";
709  for(std::size_t i=0; i<argv.size(); i++)
710  log.debug() << " " << argv[i];
711  log.debug() << messaget::eom;
712 
713  int rc = run(argv[0], argv, linker_def_infile(), def_out_file, "");
714  if(rc!=0)
715  log.warning() << "Problem parsing linker script" << messaget::eom;
716 
717  return rc;
718 }
719 
721  const std::list<irep_idt> &linker_defined_symbols,
722  linker_valuest &linker_values)
723 {
724  int fail=0;
725  for(const auto &sym : linker_defined_symbols)
726  if(linker_values.find(sym)==linker_values.end())
727  {
728  log.warning() << "Variable '" << sym
729  << "' was declared extern but never given a value, even in "
730  << "a linker script" << messaget::eom;
731 
732  null_pointer_exprt null_pointer(pointer_type(char_type()));
733  symbol_exprt null_sym(sym, pointer_type(char_type()));
734  linker_values.emplace(sym, std::make_pair(null_sym, null_pointer));
735  }
736 
737  for(const auto &pair : linker_values)
738  {
739  auto it=std::find(linker_defined_symbols.begin(),
740  linker_defined_symbols.end(), pair.first);
741  if(it==linker_defined_symbols.end())
742  {
743  fail=1;
744  log.error()
745  << "Linker script-defined symbol '" << pair.first << "' was "
746  << "either defined in the C source code, not declared extern in "
747  << "the C source code, or does not appear in the C source code"
748  << messaget::eom;
749  }
750  }
751  return fail;
752 }
753 
755 {
756  if(!data.is_object())
757  return true;
758 
759  const json_objectt &data_object = to_json_object(data);
760  return (
761  !(data_object.find("regions") != data_object.end() &&
762  data_object.find("addresses") != data_object.end() &&
763  data["regions"].is_array() && data["addresses"].is_array() &&
764  std::all_of(
765  to_json_array(data["addresses"]).begin(),
766  to_json_array(data["addresses"]).end(),
767  [](const jsont &j) {
768  if(!j.is_object())
769  return false;
770 
771  const json_objectt &address = to_json_object(j);
772  return address.find("val") != address.end() &&
773  address.find("sym") != address.end() &&
774  address["val"].is_number() && address["sym"].is_string();
775  }) &&
776  std::all_of(
777  to_json_array(data["regions"]).begin(),
778  to_json_array(data["regions"]).end(),
779  [](const jsont &j) {
780  if(!j.is_object())
781  return false;
782 
783  const json_objectt &region = to_json_object(j);
784  return region.find("start") != region.end() &&
785  region.find("size") != region.end() &&
786  region.find("annot") != region.end() &&
787  region.find("commt") != region.end() &&
788  region.find("start-symbol") != region.end() &&
789  region.find("has-end-symbol") != region.end() &&
790  region.find("section") != region.end() &&
791  region["start"].is_number() && region["size"].is_number() &&
792  region["annot"].is_string() &&
793  region["start-symbol"].is_string() &&
794  region["section"].is_string() && region["commt"].is_string() &&
795  ((region["has-end-symbol"].is_true() &&
796  region.find("end-symbol") != region.end() &&
797  region["end-symbol"].is_string()) ||
798  (region["has-end-symbol"].is_false() &&
799  region.find("size-symbol") != region.end() &&
800  region.find("end-symbol") == region.end() &&
801  region["size-symbol"].is_string()));
802  })));
803 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
irep_idt integer2bvrep(const mp_integer &src, std::size_t width)
convert an integer to bit-vector representation with given width This uses two's complement for negat...
std::string array_name(const namespacet &ns, const exprt &expr)
Definition: array_name.cpp:19
unsignedbv_typet unsigned_int_type()
Definition: c_types.cpp:36
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:235
bitvector_typet char_type()
Definition: c_types.cpp:106
Operator to return the address of an object.
Definition: pointer_expr.h:540
exprt & object()
Definition: pointer_expr.h:549
Arrays with given size.
Definition: std_types.h:807
std::string get_value(char option) const
Definition: cmdline.cpp:48
virtual bool isset(char option) const
Definition: cmdline.cpp:30
A goto_instruction_codet representing an assignment in the program.
goto_instruction_codet representation of a function call statement.
Base type of functions.
Definition: std_types.h:583
static bool write_bin_object_file(const std::string &file_name, const goto_modelt &src_goto_model, bool validate_goto_model, message_handlert &message_handler)
Writes the goto functions of src_goto_model to a binary format object file.
Definition: compile.cpp:574
A constant literal expression.
Definition: std_expr.h:2987
void set_value(const irep_idt &value)
Definition: std_expr.h:3000
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
The empty type.
Definition: std_types.h:51
Base class for all expressions.
Definition: expr.h:56
source_locationt & add_source_location()
Definition: expr.h:236
const source_locationt & source_location() const
Definition: expr.h:231
typet & type()
Return the type of the expression.
Definition: expr.h:84
bool is_constant() const
Return whether the expression is a constant.
Definition: expr.h:212
operandst & operands()
Definition: expr.h:94
function_mapt function_map
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:31
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:34
This class represents an instruction in the GOTO intermediate representation.
Definition: goto_program.h:181
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:73
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:622
std::list< instructiont > instructionst
Definition: goto_program.h:612
Array index operator.
Definition: std_expr.h:1465
exprt & array()
Definition: std_expr.h:1495
exprt & index()
Definition: std_expr.h:1505
const irep_idt & id() const
Definition: irep.h:384
iterator find(const std::string &key)
Definition: json.h:354
iterator end()
Definition: json.h:384
Definition: json.h:27
bool is_array() const
Definition: json.h:61
std::string value
Definition: json.h:132
void output(std::ostream &out) const
Definition: json.h:90
bool is_object() const
Definition: json.h:56
linker_script_merget(const std::string &elf_binary, const std::string &goto_binary, const cmdlinet &, message_handlert &)
int pointerize_linker_defined_symbols(goto_modelt &, const linker_valuest &)
convert the type of linker script-defined symbols to char*
void symbols_to_pointerize(const linker_valuest &linker_values, const exprt &expr, std::list< symbol_exprt > &to_pointerize) const
fill to_pointerize with names of linker symbols appearing in expr
int add_linker_script_definitions()
Add values of linkerscript-defined symbols to the goto-binary.
const std::string & elf_binary
int get_linker_script_data(std::list< irep_idt > &linker_defined_symbols, const symbol_tablet &symbol_table, const std::string &out_file, const std::string &def_out_file)
Write linker script definitions to linker_data.
std::map< irep_idt, std::pair< symbol_exprt, exprt > > linker_valuest
int goto_and_object_mismatch(const std::list< irep_idt > &linker_defined_symbols, linker_valuest &linker_values)
one-to-one correspondence between extern & linker symbols
int ls_data2instructions(jsont &data, const std::string &linker_script, symbol_tablet &symbol_table, linker_valuest &linker_values)
Write a list of definitions derived from data into the symbol_table.
int pointerize_subexprs_of(exprt &expr, std::list< symbol_exprt > &to_pointerize, const linker_valuest &linker_values)
const cmdlinet & cmdline
std::list< replacement_predicatet > replacement_predicates
The "shapes" of expressions to be replaced by a pointer.
int replace_expr(exprt &old_expr, const linker_valuest &linker_values, const symbol_exprt &old_symbol, const irep_idt &ident, const std::string &shape)
do the actual replacement of an expr with a new pointer expr
int linker_data_is_malformed(const jsont &data) const
Validate output of the scripts/ls_parse.py tool.
const std::string & goto_binary
unsigned get_verbosity() const
Definition: message.h:54
source_locationt source_location
Definition: message.h:247
mstreamt & error() const
Definition: message.h:399
mstreamt & warning() const
Definition: message.h:404
mstreamt & debug() const
Definition: message.h:429
message_handlert & get_message_handler()
Definition: message.h:184
@ M_DEBUG
Definition: message.h:171
@ M_RESULT
Definition: message.h:170
static eomt eom
Definition: message.h:297
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:94
The null pointer constant.
Definition: pointer_expr.h:909
The plus expression Associativity is not specified.
Definition: std_expr.h:1002
Patterns of expressions that should be replaced.
void set_comment(const irep_idt &comment)
void set_file(const irep_idt &file)
Expression to hold a symbol (variable)
Definition: std_expr.h:131
const irep_idt & get_identifier() const
Definition: std_expr.h:160
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
const symbolst & symbols
Read-only field, used to look up symbols given their names.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
The symbol table.
Definition: symbol_table.h:14
virtual iteratort begin() override
Definition: symbol_table.h:108
virtual iteratort end() override
Definition: symbol_table.h:112
virtual std::pair< symbolt &, bool > insert(symbolt symbol) override
Author: Diffblue Ltd.
Symbol table entry.
Definition: symbol.h:28
bool is_extern
Definition: symbol.h:74
bool is_static_lifetime
Definition: symbol.h:70
source_locationt location
Source code location of definition of symbol.
Definition: symbol.h:37
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:121
typet type
Type of symbol.
Definition: symbol.h:31
exprt value
Initial value of symbol.
Definition: symbol.h:34
Semantic type conversion.
Definition: std_expr.h:2068
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition: std_expr.h:2076
Compile and link source and object files.
void __CPROVER_allocated_memory(__CPROVER_size_t address, __CPROVER_size_t extent)
#define CPROVER_PREFIX
std::optional< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Expression Initialization.
Goto Programs with Functions.
Symbol Table + CFG.
const std::string & id2string(const irep_idt &d)
Definition: irep.h:40
json_arrayt & to_json_array(jsont &json)
Definition: json.h:424
json_objectt & to_json_object(jsont &json)
Definition: json.h:442
bool parse_json(std::istream &in, const std::string &filename, message_handlert &message_handler, jsont &dest)
Definition: json_parser.cpp:27
Merge linker script-defined symbols into a goto-program.
Magic numbers used throughout the codebase.
const std::size_t MAX_FLATTENED_ARRAY_SIZE
Definition: magic.h:11
double log(double x)
Definition: math.c:2776
const mp_integer string2integer(const std::string &n, unsigned base)
Definition: mp_arith.cpp:54
API to expression classes for Pointers.
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast an exprt to an address_of_exprt.
Definition: pointer_expr.h:577
static bool read_goto_binary(const std::string &filename, symbol_tablet &, goto_functionst &, message_handlert &)
Read a goto binary from a file, but do not update config.
Read Goto Programs.
int run(const std::string &what, const std::vector< std::string > &argv)
Definition: run.cpp:48
BigInt mp_integer
Definition: smt_terms.h:17
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
void recreate_initialize_function(goto_modelt &goto_model, message_handlert &message_handler)
Regenerates the CPROVER_INITIALIZE function, which initializes all non-function symbols of the goto m...
#define INITIALIZE_FUNCTION
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:272
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
Definition: std_expr.h:1533
long random(void)
Definition: stdlib.c:630
unsigned safe_string2unsigned(const std::string &str, int base)
Definition: string2int.cpp:16
void * memcpy(void *dst, const void *src, size_t n)
Definition: string.c:613
void * memset(void *s, int c, size_t n)
Definition: string.c:713
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
static bool failed(bool error_indicator)
#define size_type
Definition: unistd.c:347
dstringt irep_idt