CBMC
ansi_c_entry_point.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module:
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
9 #include "ansi_c_entry_point.h"
10 
11 #include <util/arith_tools.h>
12 #include <util/c_types.h>
13 #include <util/config.h>
14 #include <util/message.h>
15 #include <util/pointer_expr.h>
16 #include <util/symbol_table_base.h>
17 
19 
21 
23 #include "expr2c.h"
24 
26  const code_typet::parameterst &parameters,
27  code_blockt &init_code,
28  symbol_table_baset &symbol_table,
29  const c_object_factory_parameterst &object_factory_parameters)
30 {
31  exprt::operandst main_arguments;
32  main_arguments.resize(parameters.size());
33 
34  for(std::size_t param_number=0;
35  param_number<parameters.size();
36  param_number++)
37  {
38  const code_typet::parametert &p=parameters[param_number];
39  const irep_idt base_name=p.get_base_name().empty()?
40  ("argument#"+std::to_string(param_number)):p.get_base_name();
41 
42  main_arguments[param_number] = c_nondet_symbol_factory(
43  init_code,
44  symbol_table,
45  base_name,
46  p.type(),
47  p.source_location(),
48  object_factory_parameters,
50  }
51 
52  return main_arguments;
53 }
54 
56  const symbolt &function,
57  code_blockt &init_code,
58  symbol_table_baset &symbol_table)
59 {
60  bool has_return_value =
61  to_code_type(function.type).return_type() != void_type();
62 
63  if(has_return_value)
64  {
65  const symbolt &return_symbol = symbol_table.lookup_ref("return'");
66 
67  // record return value
68  init_code.add(code_outputt{
69  return_symbol.base_name, return_symbol.symbol_expr(), function.location});
70  }
71 
72  #if 0
73  std::size_t i=0;
74 
75  for(const auto &p : parameters)
76  {
77  if(p.get_identifier().empty())
78  continue;
79 
80  irep_idt identifier=p.get_identifier();
81 
82  const symbolt &symbol=symbol_table.lookup(identifier);
83 
84  if(symbol.type.id()==ID_pointer)
85  {
86  codet output(ID_output);
87  output.operands().resize(2);
88 
89  output.op0()=
93  from_integer(0, index_type())));
94  output.op1()=symbol.symbol_expr();
95  output.add_source_location()=p.source_location();
96 
97  init_code.add(std::move(output));
98  }
99 
100  i++;
101  }
102  #endif
103 }
104 
106  symbol_table_baset &symbol_table,
107  message_handlert &message_handler,
108  const c_object_factory_parameterst &object_factory_parameters)
109 {
110  // check if entry point is already there
111  if(symbol_table.symbols.find(goto_functionst::entry_point())!=
112  symbol_table.symbols.end())
113  return false; // silently ignore
114 
115  irep_idt main_symbol;
116 
117  // find main symbol, if any is given
118  if(config.main.has_value())
119  {
120  auto matches = symbol_table.match_name_or_base_name(*config.main);
121 
122  for(auto it = matches.begin(); it != matches.end();) // no ++it
123  {
124  if((*it)->second.type.id() != ID_code || (*it)->second.is_property)
125  it = matches.erase(it);
126  else
127  ++it;
128  }
129 
130  if(matches.empty())
131  {
132  messaget message(message_handler);
133  message.error() << "main symbol '" << config.main.value() << "' not found"
134  << messaget::eom;
135  return true; // give up
136  }
137 
138  if(matches.size()>=2)
139  {
140  messaget message(message_handler);
141  message.error() << "main symbol '" << config.main.value()
142  << "' is ambiguous" << messaget::eom;
143  return true;
144  }
145 
146  main_symbol = matches.front()->first;
147  }
148  else
149  main_symbol=ID_main;
150 
151  // look it up
152  symbol_table_baset::symbolst::const_iterator s_it =
153  symbol_table.symbols.find(main_symbol);
154 
155  if(s_it==symbol_table.symbols.end())
156  return false; // give up silently
157 
158  const symbolt &symbol=s_it->second;
159 
160  // check if it has a body
161  if(symbol.value.is_nil())
162  {
163  messaget message(message_handler);
164  message.error() << "main symbol '" << id2string(main_symbol)
165  << "' has no body" << messaget::eom;
166  return false; // give up
167  }
168 
169  static_lifetime_init(symbol_table, symbol.location);
170 
172  symbol, symbol_table, message_handler, object_factory_parameters);
173 }
174 
185  const symbolt &symbol,
186  symbol_table_baset &symbol_table,
187  message_handlert &message_handler,
188  const c_object_factory_parameterst &object_factory_parameters)
189 {
190  PRECONDITION(!symbol.value.is_nil());
191  code_blockt init_code;
192 
193  // add 'HIDE' label
194  init_code.add(code_labelt(CPROVER_PREFIX "HIDE", code_skipt()));
195 
196  // build call to initialization function
197 
198  {
199  symbol_table_baset::symbolst::const_iterator init_it =
200  symbol_table.symbols.find(INITIALIZE_FUNCTION);
201 
202  if(init_it==symbol_table.symbols.end())
203  {
204  messaget message(message_handler);
205  message.error() << "failed to find " INITIALIZE_FUNCTION " symbol"
206  << messaget::eom;
207  return true;
208  }
209 
210  code_function_callt call_init(init_it->second.symbol_expr());
211  call_init.add_source_location()=symbol.location;
212 
213  init_code.add(std::move(call_init));
214  }
215 
216  // build call to main function
217 
218  code_function_callt call_main(symbol.symbol_expr());
219  call_main.add_source_location()=symbol.location;
220  call_main.function().add_source_location()=symbol.location;
221 
222  if(to_code_type(symbol.type).return_type() != void_type())
223  {
224  auxiliary_symbolt return_symbol;
225  return_symbol.mode=ID_C;
226  return_symbol.is_static_lifetime=false;
227  return_symbol.name="return'";
228  return_symbol.base_name = "return'";
229  return_symbol.type=to_code_type(symbol.type).return_type();
230 
231  symbol_table.add(return_symbol);
232  call_main.lhs()=return_symbol.symbol_expr();
233  }
234 
235  const code_typet::parameterst &parameters=
236  to_code_type(symbol.type).parameters();
237 
238  if(symbol.name==ID_main)
239  {
240  if(parameters.empty())
241  {
242  // ok
243  }
244  // The C standard (and any related architecture descriptions) enforces an
245  // order of parameters. The user, however, may supply arbitrary
246  // (syntactically valid) C code, even one that does not respect the calling
247  // conventions set out in the C standard. If the user does supply such code,
248  // then we can only tell them that they got it wrong, which is what we do
249  // via the error message in the else branch of this code.
250  else if(
251  parameters.size() >= 2 && parameters[1].type().id() == ID_pointer &&
252  (parameters.size() == 2 ||
253  (parameters.size() == 3 && parameters[2].type().id() == ID_pointer)))
254  {
255  namespacet ns(symbol_table);
256 
257  {
258  symbolt argc_symbol{"argc'", signed_int_type(), ID_C};
259  argc_symbol.base_name = "argc'";
260  argc_symbol.is_static_lifetime = true;
261  argc_symbol.is_lvalue = true;
262 
263  auto r = symbol_table.insert(argc_symbol);
264  if(!r.second && r.first != argc_symbol)
265  {
266  messaget message(message_handler);
267  message.error() << "argc already exists but is not usable"
268  << messaget::eom;
269  return true;
270  }
271  }
272 
273  const symbolt &argc_symbol = ns.lookup("argc'");
274 
275  {
276  // we make the type of this thing an array of pointers
277  // need to add one to the size -- the array is terminated
278  // with NULL
279  const exprt one_expr = from_integer(1, argc_symbol.type);
280  const plus_exprt size_expr(argc_symbol.symbol_expr(), one_expr);
281  const array_typet argv_type(pointer_type(char_type()), size_expr);
282 
283  symbolt argv_symbol{"argv'", argv_type, ID_C};
284  argv_symbol.base_name = "argv'";
285  argv_symbol.is_static_lifetime = true;
286  argv_symbol.is_lvalue = true;
287 
288  auto r = symbol_table.insert(argv_symbol);
289  if(!r.second && r.first != argv_symbol)
290  {
291  messaget message(message_handler);
292  message.error() << "argv already exists but is not usable"
293  << messaget::eom;
294  return true;
295  }
296  }
297 
298  const symbolt &argv_symbol=ns.lookup("argv'");
299  const array_typet &argv_array_type = to_array_type(argv_symbol.type);
300 
301  {
302  // Assume argc is at least zero. Note that we don't assume it's
303  // at least one, since this isn't guaranteed, as exemplified by
304  // https://www.qualys.com/2022/01/25/cve-2021-4034/pwnkit.txt
305  // The C standard only guarantees "The value of argc shall be
306  // nonnegative." and "argv[argc] shall be a null pointer."
307  exprt zero = from_integer(0, argc_symbol.type);
308 
310  argc_symbol.symbol_expr(), ID_ge, std::move(zero));
311 
312  init_code.add(code_assumet(std::move(ge)));
313  }
314 
315  if(config.ansi_c.max_argc.has_value())
316  {
317  exprt bound_expr =
318  from_integer(*config.ansi_c.max_argc, argc_symbol.type);
319 
321  argc_symbol.symbol_expr(), ID_le, std::move(bound_expr));
322 
323  init_code.add(code_assumet(std::move(le)));
324  }
325 
326  // record argc as an input
327  init_code.add(code_inputt{"argc", argc_symbol.symbol_expr()});
328 
329  if(parameters.size()==3)
330  {
331  {
332  symbolt envp_size_symbol{"envp_size'", size_type(), ID_C};
333  envp_size_symbol.base_name = "envp_size'";
334  envp_size_symbol.is_static_lifetime = true;
335 
336  if(!symbol_table.insert(std::move(envp_size_symbol)).second)
337  {
338  messaget message(message_handler);
339  message.error()
340  << "failed to insert envp_size symbol" << messaget::eom;
341  return true;
342  }
343  }
344 
345  const symbolt &envp_size_symbol=ns.lookup("envp_size'");
346 
347  {
348  symbolt envp_symbol{
349  "envp'",
350  array_typet(
351  pointer_type(char_type()), envp_size_symbol.symbol_expr()),
352  ID_C};
353  envp_symbol.base_name = "envp'";
354  envp_symbol.is_static_lifetime = true;
355 
356  if(!symbol_table.insert(std::move(envp_symbol)).second)
357  {
358  messaget message(message_handler);
359  message.error() << "failed to insert envp symbol" << messaget::eom;
360  return true;
361  }
362  }
363 
364  // assume envp_size is INTMAX-1
365  const mp_integer max =
366  to_integer_bitvector_type(envp_size_symbol.type).largest();
367 
368  exprt max_minus_one=from_integer(max-1, envp_size_symbol.type);
369 
371  envp_size_symbol.symbol_expr(), ID_le, std::move(max_minus_one));
372 
373  init_code.add(code_assumet(le));
374  }
375 
376  {
377  /* zero_string doesn't work yet */
378 
379  /*
380  exprt zero_string(ID_zero_string, array_typet());
381  zero_string.type().subtype()=char_type();
382  zero_string.type().set(ID_size, "infinity");
383  const index_exprt index(zero_string, from_integer(0, uint_type()));
384  exprt address_of =
385  typecast_exprt::conditional_cast(
386  address_of_exprt(index, pointer_type(char_type())),
387  argv_symbol.type.subtype());
388 
389  // assign argv[*] to the address of a string-object
390  array_of_exprt array_of(address_of, argv_symbol.type);
391 
392  init_code.copy_to_operands(
393  code_assignt(argv_symbol.symbol_expr(), array_of));
394  */
395  }
396 
397  {
398  // assign argv[argc] to NULL
399  const null_pointer_exprt null(
400  to_pointer_type(argv_array_type.element_type()));
401 
402  index_exprt index_expr(
403  argv_symbol.symbol_expr(),
405  argc_symbol.symbol_expr(), argv_array_type.index_type()));
406 
407  init_code.add(code_frontend_assignt(index_expr, null));
408  // disable bounds check on that one
409  init_code.statements().back().add_source_location().add_pragma(
410  "disable:bounds-check");
411  }
412 
413  if(parameters.size()==3)
414  {
415  const symbolt &envp_symbol=ns.lookup("envp'");
416  const array_typet &envp_array_type = to_array_type(envp_symbol.type);
417  const symbolt &envp_size_symbol=ns.lookup("envp_size'");
418 
419  // assume envp[envp_size] is NULL
420  null_pointer_exprt null(
421  to_pointer_type(envp_array_type.element_type()));
422 
423  index_exprt index_expr(
424  envp_symbol.symbol_expr(),
426  envp_size_symbol.symbol_expr(), envp_array_type.index_type()));
427 
428  equal_exprt is_null(std::move(index_expr), std::move(null));
429 
430  init_code.add(code_assumet(is_null));
431  // disable bounds check on that one
432  init_code.statements().back().add_source_location().add_pragma(
433  "disable:bounds-check");
434  }
435 
436  {
437  exprt::operandst &operands=call_main.arguments();
438 
439  if(parameters.size()==3)
440  operands.resize(3);
441  else
442  operands.resize(2);
443 
444  exprt &op0=operands[0];
445  exprt &op1=operands[1];
446 
448  argc_symbol.symbol_expr(), parameters[0].type());
449 
450  {
451  index_exprt index_expr(
452  argv_symbol.symbol_expr(),
453  from_integer(0, argv_array_type.index_type()));
454  // disable bounds check on that one
455  index_expr.add_source_location().add_pragma("disable:bounds-check");
456 
457  const pointer_typet &pointer_type =
458  to_pointer_type(parameters[1].type());
459 
461  address_of_exprt(index_expr), pointer_type);
462  }
463 
464  // do we need envp?
465  if(parameters.size()==3)
466  {
467  const symbolt &envp_symbol=ns.lookup("envp'");
468  const array_typet &envp_array_type = to_array_type(envp_symbol.type);
469 
470  index_exprt index_expr(
471  envp_symbol.symbol_expr(),
472  from_integer(0, envp_array_type.index_type()));
473 
474  const pointer_typet &pointer_type =
475  to_pointer_type(parameters[2].type());
476 
477  operands[2] = typecast_exprt::conditional_cast(
478  address_of_exprt(index_expr), pointer_type);
479  }
480  }
481  }
482  else
483  {
484  const namespacet ns{symbol_table};
485  const std::string main_signature = type2c(symbol.type, ns);
486  messaget message(message_handler);
487  message.error().source_location = symbol.location;
488  message.error() << "'main' with signature '" << main_signature
489  << "' found,"
490  << " but expecting one of:\n"
491  << " int main(void)\n"
492  << " int main(int argc, char *argv[])\n"
493  << " int main(int argc, char *argv[], char *envp[])\n"
494  << "If this is a non-standard main entry point please "
495  "provide a custom\n"
496  << "entry function and use --function instead"
497  << messaget::eom;
498  return true;
499  }
500  }
501  else
502  {
503  // produce nondet arguments
505  parameters, init_code, symbol_table, object_factory_parameters);
506  }
507 
508  init_code.add(std::move(call_main));
509 
510  // TODO: add read/modified (recursively in call graph) globals as INPUT/OUTPUT
511 
512  record_function_outputs(symbol, init_code, symbol_table);
513 
514  // now call destructor functions (a GCC extension)
515 
516  for(const auto &symbol_table_entry : symbol_table.symbols)
517  {
518  const symbolt &symbol = symbol_table_entry.second;
519 
520  if(symbol.type.id() != ID_code)
521  continue;
522 
523  const code_typet &code_type = to_code_type(symbol.type);
524  if(
525  code_type.return_type().id() == ID_destructor &&
526  code_type.parameters().empty())
527  {
528  code_function_callt destructor_call(symbol.symbol_expr());
529  destructor_call.add_source_location() = symbol.location;
530  init_code.add(std::move(destructor_call));
531  }
532  }
533 
534  // add the entry point symbol
535  symbolt new_symbol{
537  new_symbol.base_name = goto_functionst::entry_point();
538  new_symbol.value.swap(init_code);
539 
540  if(!symbol_table.insert(std::move(new_symbol)).second)
541  {
542  messaget message(message_handler);
543  message.error() << "failed to insert main symbol" << messaget::eom;
544  return true;
545  }
546 
547  return false;
548 }
@ AUTOMATIC_LOCAL
Allocate local objects with automatic lifetime.
bool generate_ansi_c_start_function(const symbolt &symbol, symbol_table_baset &symbol_table, message_handlert &message_handler, const c_object_factory_parameterst &object_factory_parameters)
Generate a _start function for a specific function.
exprt::operandst build_function_environment(const code_typet::parameterst &parameters, code_blockt &init_code, symbol_table_baset &symbol_table, const c_object_factory_parameterst &object_factory_parameters)
void record_function_outputs(const symbolt &function, code_blockt &init_code, symbol_table_baset &symbol_table)
bool ansi_c_entry_point(symbol_table_baset &symbol_table, message_handlert &message_handler, const c_object_factory_parameterst &object_factory_parameters)
configt config
Definition: config.cpp:25
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
const integer_bitvector_typet & to_integer_bitvector_type(const typet &type)
Cast a typet to an integer_bitvector_typet.
symbol_exprt c_nondet_symbol_factory(code_blockt &init_code, symbol_table_baset &symbol_table, const irep_idt base_name, const typet &type, const source_locationt &loc, const c_object_factory_parameterst &object_factory_parameters, const lifetimet lifetime)
Creates a symbol and generates code so that it can vary over all possible values for its type.
C Nondet Symbol Factory.
empty_typet void_type()
Definition: c_types.cpp:245
signedbv_typet signed_int_type()
Definition: c_types.cpp:22
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:235
bitvector_typet char_type()
Definition: c_types.cpp:106
Operator to return the address of an object.
Definition: pointer_expr.h:540
Arrays with given size.
Definition: std_types.h:807
typet index_type() const
The type of the index expressions into any instance of this type.
Definition: std_types.cpp:33
const typet & element_type() const
The type of the elements of the array.
Definition: std_types.h:827
Internally generated symbol table entryThis is a symbol generated as part of translation to or modifi...
Definition: symbol.h:153
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition: std_expr.h:762
An assumption, which must hold in subsequent code.
Definition: std_code.h:217
A codet representing sequential composition of program statements.
Definition: std_code.h:130
void add(const codet &code)
Definition: std_code.h:168
code_operandst & statements()
Definition: std_code.h:138
A codet representing an assignment in the program.
Definition: std_code.h:24
goto_instruction_codet representation of a function call statement.
A goto_instruction_codet representing the declaration that an input of a particular description has a...
codet representation of a label for branch targets.
Definition: std_code.h:959
A goto_instruction_codet representing the declaration that an output of a particular description has ...
A codet representing a skip statement.
Definition: std_code.h:322
const irep_idt & get_base_name() const
Definition: std_types.h:639
Base type of functions.
Definition: std_types.h:583
std::vector< parametert > parameterst
Definition: std_types.h:585
const typet & return_type() const
Definition: std_types.h:689
const parameterst & parameters() const
Definition: std_types.h:699
Data structure for representing an arbitrary statement in a program.
Definition: std_code_base.h:29
exprt & op1()
Definition: expr.h:136
exprt & op0()
Definition: expr.h:133
std::optional< std::string > main
Definition: config.h:357
struct configt::ansi_ct ansi_c
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
bool empty() const
Definition: dstring.h:89
Equality.
Definition: std_expr.h:1361
Base class for all expressions.
Definition: expr.h:56
std::vector< exprt > operandst
Definition: expr.h:58
source_locationt & add_source_location()
Definition: expr.h:236
const source_locationt & source_location() const
Definition: expr.h:231
typet & type()
Return the type of the expression.
Definition: expr.h:84
operandst & operands()
Definition: expr.h:94
static irep_idt entry_point()
Get the identifier of the entry point to a goto model.
Array index operator.
Definition: std_expr.h:1465
mp_integer largest() const
Return the largest value that can be represented using this type.
const irep_idt & id() const
Definition: irep.h:384
bool is_nil() const
Definition: irep.h:364
source_locationt source_location
Definition: message.h:247
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:155
mstreamt & error() const
Definition: message.h:399
static eomt eom
Definition: message.h:297
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:94
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
Definition: namespace.cpp:148
The null pointer constant.
Definition: pointer_expr.h:909
The plus expression Associativity is not specified.
Definition: std_expr.h:1002
The pointer type These are both 'bitvector_typet' (they have a width) and 'type_with_subtypet' (they ...
Definition: pointer_expr.h:24
void add_pragma(const irep_idt &pragma)
The symbol table base class interface.
const symbolt * lookup(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
const symbolst & symbols
Read-only field, used to look up symbols given their names.
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
std::list< symbolst::const_iterator > match_name_or_base_name(const irep_idt &id) const
Collect all symbols the name of which matches id or the base name of which matches id.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
virtual std::pair< symbolt &, bool > insert(symbolt symbol)=0
Move or copy a new symbol to the symbol table.
Symbol table entry.
Definition: symbol.h:28
irep_idt base_name
Base (non-scoped) name.
Definition: symbol.h:46
bool is_static_lifetime
Definition: symbol.h:70
source_locationt location
Source code location of definition of symbol.
Definition: symbol.h:37
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:121
typet type
Type of symbol.
Definition: symbol.h:31
irep_idt name
The unique identifier.
Definition: symbol.h:40
exprt value
Initial value of symbol.
Definition: symbol.h:34
irep_idt mode
Language mode.
Definition: symbol.h:49
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition: std_expr.h:2076
#define CPROVER_PREFIX
std::string type2c(const typet &type, const namespacet &ns, const expr2c_configurationt &configuration)
Definition: expr2c.cpp:4165
Goto Programs with Functions.
const std::string & id2string(const irep_idt &d)
Definition: irep.h:40
static int8_t r
Definition: irep_hash.h:60
API to expression classes for Pointers.
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: pointer_expr.h:93
BigInt mp_integer
Definition: smt_terms.h:17
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
static std::optional< codet > static_lifetime_init(const irep_idt &identifier, symbol_table_baset &symbol_table)
#define INITIALIZE_FUNCTION
const code_typet & to_code_type(const typet &type)
Cast a typet to a code_typet.
Definition: std_types.h:788
const array_typet & to_array_type(const typet &type)
Cast a typet to an array_typet.
Definition: std_types.h:888
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
static exprt is_null(const array_string_exprt &string, array_poolt &array_pool)
Expression which is true when the string is equal to the literal "null".
std::optional< mp_integer > max_argc
Maximum value of argc, which is operating-systems dependent: Windows limits the number of characters ...
Definition: config.h:296
Author: Diffblue Ltd.
#define size_type
Definition: unistd.c:347