CBMC
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
ansi_c_entry_point.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module:
4
5Author: Daniel Kroening, kroening@kroening.com
6
7\*******************************************************************/
8
10
11#include <util/arith_tools.h>
12#include <util/c_types.h>
13#include <util/config.h>
14#include <util/message.h>
15#include <util/pointer_expr.h>
17
19
21
23#include "expr2c.h"
24
26 const code_typet::parameterst &parameters,
28 symbol_table_baset &symbol_table,
29 const c_object_factory_parameterst &object_factory_parameters)
30{
32 main_arguments.resize(parameters.size());
33
34 for(std::size_t param_number=0;
35 param_number<parameters.size();
37 {
38 const code_typet::parametert &p=parameters[param_number];
39 const irep_idt base_name=p.get_base_name().empty()?
40 ("argument#"+std::to_string(param_number)):p.get_base_name();
41
44 symbol_table,
45 base_name,
46 p.type(),
48 object_factory_parameters,
50 }
51
52 return main_arguments;
53}
54
56 const symbolt &function,
58 symbol_table_baset &symbol_table)
59{
60 bool has_return_value =
61 to_code_type(function.type).return_type() != void_type();
62
63 if(has_return_value)
64 {
65 const symbolt &return_symbol = symbol_table.lookup_ref("return'");
66
67 // record return value
69 return_symbol.base_name, return_symbol.symbol_expr(), function.location});
70 }
71
72 #if 0
73 std::size_t i=0;
74
75 for(const auto &p : parameters)
76 {
77 if(p.get_identifier().empty())
78 continue;
79
80 irep_idt identifier=p.get_identifier();
81
82 const symbolt &symbol=symbol_table.lookup(identifier);
83
84 if(symbol.type.id()==ID_pointer)
85 {
86 codet output(ID_output);
87 output.operands().resize(2);
88
89 output.op0()=
93 from_integer(0, index_type())));
94 output.op1()=symbol.symbol_expr();
95 output.add_source_location()=p.source_location();
96
97 init_code.add(std::move(output));
98 }
99
100 i++;
101 }
102 #endif
103}
104
106 symbol_table_baset &symbol_table,
107 message_handlert &message_handler,
108 const c_object_factory_parameterst &object_factory_parameters)
109{
110 // check if entry point is already there
111 if(symbol_table.symbols.find(goto_functionst::entry_point())!=
112 symbol_table.symbols.end())
113 return false; // silently ignore
114
115 irep_idt main_symbol;
116
117 // find main symbol, if any is given
118 if(config.main.has_value())
119 {
120 auto matches = symbol_table.match_name_or_base_name(*config.main);
121
122 for(auto it = matches.begin(); it != matches.end();) // no ++it
123 {
124 if((*it)->second.type.id() != ID_code || (*it)->second.is_property)
125 it = matches.erase(it);
126 else
127 ++it;
128 }
129
130 if(matches.empty())
131 {
132 messaget message(message_handler);
133 message.error() << "main symbol '" << config.main.value() << "' not found"
134 << messaget::eom;
135 return true; // give up
136 }
137
138 if(matches.size()>=2)
139 {
140 messaget message(message_handler);
141 message.error() << "main symbol '" << config.main.value()
142 << "' is ambiguous" << messaget::eom;
143 return true;
144 }
145
146 main_symbol = matches.front()->first;
147 }
148 else
149 main_symbol=ID_main;
150
151 // look it up
152 symbol_table_baset::symbolst::const_iterator s_it =
153 symbol_table.symbols.find(main_symbol);
154
155 if(s_it==symbol_table.symbols.end())
156 return false; // give up silently
157
158 const symbolt &symbol=s_it->second;
159
160 // check if it has a body
161 if(symbol.value.is_nil())
162 {
163 messaget message(message_handler);
164 message.error() << "main symbol '" << id2string(main_symbol)
165 << "' has no body" << messaget::eom;
166 return false; // give up
167 }
168
169 static_lifetime_init(symbol_table, symbol.location);
170
172 symbol, symbol_table, message_handler, object_factory_parameters);
173}
174
185 const symbolt &symbol,
186 symbol_table_baset &symbol_table,
187 message_handlert &message_handler,
188 const c_object_factory_parameterst &object_factory_parameters)
189{
190 PRECONDITION(!symbol.value.is_nil());
192
193 // add 'HIDE' label
195
196 // build call to initialization function
197
198 {
199 symbol_table_baset::symbolst::const_iterator init_it =
200 symbol_table.symbols.find(INITIALIZE_FUNCTION);
201
202 if(init_it==symbol_table.symbols.end())
203 {
204 messaget message(message_handler);
205 message.error() << "failed to find " INITIALIZE_FUNCTION " symbol"
206 << messaget::eom;
207 return true;
208 }
209
210 code_function_callt call_init(init_it->second.symbol_expr());
211 call_init.add_source_location()=symbol.location;
212
213 init_code.add(std::move(call_init));
214 }
215
216 // build call to main function
217
219 call_main.add_source_location()=symbol.location;
220 call_main.function().add_source_location()=symbol.location;
221
222 if(to_code_type(symbol.type).return_type() != void_type())
223 {
225 return_symbol.mode=ID_C;
226 return_symbol.is_static_lifetime=false;
227 return_symbol.name="return'";
228 return_symbol.base_name = "return'";
229 return_symbol.type=to_code_type(symbol.type).return_type();
230
231 symbol_table.add(return_symbol);
232 call_main.lhs()=return_symbol.symbol_expr();
233 }
234
235 const code_typet::parameterst &parameters=
236 to_code_type(symbol.type).parameters();
237
238 if(symbol.name==ID_main)
239 {
240 if(parameters.empty())
241 {
242 // ok
243 }
244 // The C standard (and any related architecture descriptions) enforces an
245 // order of parameters. The user, however, may supply arbitrary
246 // (syntactically valid) C code, even one that does not respect the calling
247 // conventions set out in the C standard. If the user does supply such code,
248 // then we can only tell them that they got it wrong, which is what we do
249 // via the error message in the else branch of this code.
250 else if(
251 parameters.size() >= 2 && parameters[1].type().id() == ID_pointer &&
252 (parameters.size() == 2 ||
253 (parameters.size() == 3 && parameters[2].type().id() == ID_pointer)))
254 {
255 namespacet ns(symbol_table);
256
257 {
259 argc_symbol.base_name = "argc'";
260 argc_symbol.is_static_lifetime = true;
261 argc_symbol.is_lvalue = true;
262
263 auto r = symbol_table.insert(argc_symbol);
264 if(!r.second && r.first != argc_symbol)
265 {
266 messaget message(message_handler);
267 message.error() << "argc already exists but is not usable"
268 << messaget::eom;
269 return true;
270 }
271 }
272
273 const symbolt &argc_symbol = ns.lookup("argc'");
274
275 {
276 // we make the type of this thing an array of pointers
277 // need to add one to the size -- the array is terminated
278 // with NULL
279 const exprt one_expr = from_integer(1, argc_symbol.type);
280 const plus_exprt size_expr(argc_symbol.symbol_expr(), one_expr);
282
284 argv_symbol.base_name = "argv'";
285 argv_symbol.is_static_lifetime = true;
286 argv_symbol.is_lvalue = true;
287
288 auto r = symbol_table.insert(argv_symbol);
289 if(!r.second && r.first != argv_symbol)
290 {
291 messaget message(message_handler);
292 message.error() << "argv already exists but is not usable"
293 << messaget::eom;
294 return true;
295 }
296 }
297
298 const symbolt &argv_symbol=ns.lookup("argv'");
300
301 {
302 // Assume argc is at least zero. Note that we don't assume it's
303 // at least one, since this isn't guaranteed, as exemplified by
304 // https://www.qualys.com/2022/01/25/cve-2021-4034/pwnkit.txt
305 // The C standard only guarantees "The value of argc shall be
306 // nonnegative." and "argv[argc] shall be a null pointer."
307 exprt zero = from_integer(0, argc_symbol.type);
308
310 argc_symbol.symbol_expr(), ID_ge, std::move(zero));
311
312 init_code.add(code_assumet(std::move(ge)));
313 }
314
315 if(config.ansi_c.max_argc.has_value())
316 {
318 from_integer(*config.ansi_c.max_argc, argc_symbol.type);
319
321 argc_symbol.symbol_expr(), ID_le, std::move(bound_expr));
322
323 init_code.add(code_assumet(std::move(le)));
324 }
325
326 // record argc as an input
327 init_code.add(code_inputt{"argc", argc_symbol.symbol_expr()});
328
329 if(parameters.size()==3)
330 {
331 {
332 symbolt envp_size_symbol{"envp_size'", size_type(), ID_C};
333 envp_size_symbol.base_name = "envp_size'";
334 envp_size_symbol.is_static_lifetime = true;
335
336 if(!symbol_table.insert(std::move(envp_size_symbol)).second)
337 {
338 messaget message(message_handler);
339 message.error()
340 << "failed to insert envp_size symbol" << messaget::eom;
341 return true;
342 }
343 }
344
345 const symbolt &envp_size_symbol=ns.lookup("envp_size'");
346
347 {
349 "envp'",
351 pointer_type(char_type()), envp_size_symbol.symbol_expr()),
352 ID_C};
353 envp_symbol.base_name = "envp'";
354 envp_symbol.is_static_lifetime = true;
355
356 if(!symbol_table.insert(std::move(envp_symbol)).second)
357 {
358 messaget message(message_handler);
359 message.error() << "failed to insert envp symbol" << messaget::eom;
360 return true;
361 }
362 }
363
364 // assume envp_size is INTMAX-1
365 const mp_integer max =
367
369
371 envp_size_symbol.symbol_expr(), ID_le, std::move(max_minus_one));
372
374 }
375
376 {
377 /* zero_string doesn't work yet */
378
379 /*
380 exprt zero_string(ID_zero_string, array_typet());
381 zero_string.type().subtype()=char_type();
382 zero_string.type().set(ID_size, "infinity");
383 const index_exprt index(zero_string, from_integer(0, uint_type()));
384 exprt address_of =
385 typecast_exprt::conditional_cast(
386 address_of_exprt(index, pointer_type(char_type())),
387 argv_symbol.type.subtype());
388
389 // assign argv[*] to the address of a string-object
390 array_of_exprt array_of(address_of, argv_symbol.type);
391
392 init_code.copy_to_operands(
393 code_assignt(argv_symbol.symbol_expr(), array_of));
394 */
395 }
396
397 {
398 // assign argv[argc] to NULL
400 to_pointer_type(argv_array_type.element_type()));
401
403 argv_symbol.symbol_expr(),
405 argc_symbol.symbol_expr(), argv_array_type.index_type()));
406
408 // disable bounds check on that one
409 init_code.statements().back().add_source_location().add_pragma(
410 "disable:bounds-check");
411 }
412
413 if(parameters.size()==3)
414 {
415 const symbolt &envp_symbol=ns.lookup("envp'");
417 const symbolt &envp_size_symbol=ns.lookup("envp_size'");
418
419 // assume envp[envp_size] is NULL
421 to_pointer_type(envp_array_type.element_type()));
422
424 envp_symbol.symbol_expr(),
426 envp_size_symbol.symbol_expr(), envp_array_type.index_type()));
427
428 equal_exprt is_null(std::move(index_expr), std::move(null));
429
431 // disable bounds check on that one
432 init_code.statements().back().add_source_location().add_pragma(
433 "disable:bounds-check");
434 }
435
436 {
437 exprt::operandst &operands=call_main.arguments();
438
439 if(parameters.size()==3)
440 operands.resize(3);
441 else
442 operands.resize(2);
443
444 exprt &op0=operands[0];
445 exprt &op1=operands[1];
446
448 argc_symbol.symbol_expr(), parameters[0].type());
449
450 {
452 argv_symbol.symbol_expr(),
453 from_integer(0, argv_array_type.index_type()));
454 // disable bounds check on that one
455 index_expr.add_source_location().add_pragma("disable:bounds-check");
456
458 to_pointer_type(parameters[1].type());
459
462 }
463
464 // do we need envp?
465 if(parameters.size()==3)
466 {
467 const symbolt &envp_symbol=ns.lookup("envp'");
469
471 envp_symbol.symbol_expr(),
472 from_integer(0, envp_array_type.index_type()));
473
475 to_pointer_type(parameters[2].type());
476
479 }
480 }
481 }
482 else
483 {
484 const namespacet ns{symbol_table};
485 const std::string main_signature = type2c(symbol.type, ns);
486 messaget message(message_handler);
487 message.error().source_location = symbol.location;
488 message.error() << "'main' with signature '" << main_signature
489 << "' found,"
490 << " but expecting one of:\n"
491 << " int main(void)\n"
492 << " int main(int argc, char *argv[])\n"
493 << " int main(int argc, char *argv[], char *envp[])\n"
494 << "If this is a non-standard main entry point please "
495 "provide a custom\n"
496 << "entry function and use --function instead"
497 << messaget::eom;
498 return true;
499 }
500 }
501 else
502 {
503 // produce nondet arguments
505 parameters, init_code, symbol_table, object_factory_parameters);
506 }
507
508 init_code.add(std::move(call_main));
509
510 // TODO: add read/modified (recursively in call graph) globals as INPUT/OUTPUT
511
512 record_function_outputs(symbol, init_code, symbol_table);
513
514 // now call destructor functions (a GCC extension)
515
516 for(const auto &symbol_table_entry : symbol_table.symbols)
517 {
518 const symbolt &symbol = symbol_table_entry.second;
519
520 if(symbol.type.id() != ID_code)
521 continue;
522
523 const code_typet &code_type = to_code_type(symbol.type);
524 if(
525 code_type.return_type().id() == ID_destructor &&
526 code_type.parameters().empty())
527 {
529 destructor_call.add_source_location() = symbol.location;
530 init_code.add(std::move(destructor_call));
531 }
532 }
533
534 // add the entry point symbol
535 symbolt new_symbol{
537 new_symbol.base_name = goto_functionst::entry_point();
538 new_symbol.value.swap(init_code);
539
540 if(!symbol_table.insert(std::move(new_symbol)).second)
541 {
542 messaget message(message_handler);
543 message.error() << "failed to insert main symbol" << messaget::eom;
544 return true;
545 }
546
547 return false;
548}
@ AUTOMATIC_LOCAL
Allocate local objects with automatic lifetime.
bool generate_ansi_c_start_function(const symbolt &symbol, symbol_table_baset &symbol_table, message_handlert &message_handler, const c_object_factory_parameterst &object_factory_parameters)
Generate a _start function for a specific function.
exprt::operandst build_function_environment(const code_typet::parameterst &parameters, code_blockt &init_code, symbol_table_baset &symbol_table, const c_object_factory_parameterst &object_factory_parameters)
void record_function_outputs(const symbolt &function, code_blockt &init_code, symbol_table_baset &symbol_table)
bool ansi_c_entry_point(symbol_table_baset &symbol_table, message_handlert &message_handler, const c_object_factory_parameterst &object_factory_parameters)
configt config
Definition config.cpp:25
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
const integer_bitvector_typet & to_integer_bitvector_type(const typet &type)
Cast a typet to an integer_bitvector_typet.
symbol_exprt c_nondet_symbol_factory(code_blockt &init_code, symbol_table_baset &symbol_table, const irep_idt base_name, const typet &type, const source_locationt &loc, const c_object_factory_parameterst &object_factory_parameters, const lifetimet lifetime)
Creates a symbol and generates code so that it can vary over all possible values for its type.
C Nondet Symbol Factory.
empty_typet void_type()
Definition c_types.cpp:245
signedbv_typet signed_int_type()
Definition c_types.cpp:22
pointer_typet pointer_type(const typet &subtype)
Definition c_types.cpp:235
bitvector_typet char_type()
Definition c_types.cpp:106
Operator to return the address of an object.
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:562
Arrays with given size.
Definition std_types.h:807
Internally generated symbol table entryThis is a symbol generated as part of translation to or modifi...
Definition symbol.h:153
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition std_expr.h:762
An assumption, which must hold in subsequent code.
Definition std_code.h:217
A codet representing sequential composition of program statements.
Definition std_code.h:130
A codet representing an assignment in the program.
Definition std_code.h:24
goto_instruction_codet representation of a function call statement.
A goto_instruction_codet representing the declaration that an input of a particular description has a...
codet representation of a label for branch targets.
Definition std_code.h:959
A goto_instruction_codet representing the declaration that an output of a particular description has ...
A codet representing a skip statement.
Definition std_code.h:322
const irep_idt & get_base_name() const
Definition std_types.h:639
Base type of functions.
Definition std_types.h:583
std::vector< parametert > parameterst
Definition std_types.h:586
Data structure for representing an arbitrary statement in a program.
exprt & op0()
Definition expr.h:133
exprt & op1()
Definition expr.h:136
std::optional< std::string > main
Definition config.h:372
struct configt::ansi_ct ansi_c
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition dstring.h:38
Equality.
Definition std_expr.h:1366
Base class for all expressions.
Definition expr.h:56
std::vector< exprt > operandst
Definition expr.h:58
typet & type()
Return the type of the expression.
Definition expr.h:84
operandst & operands()
Definition expr.h:94
const source_locationt & source_location() const
Definition expr.h:231
source_locationt & add_source_location()
Definition expr.h:236
static irep_idt entry_point()
Get the identifier of the entry point to a goto model.
Array index operator.
Definition std_expr.h:1470
const irep_idt & id() const
Definition irep.h:388
bool is_nil() const
Definition irep.h:368
source_locationt source_location
Definition message.h:239
Class that provides messages with a built-in verbosity 'level'.
Definition message.h:154
mstreamt & error() const
Definition message.h:391
static eomt eom
Definition message.h:289
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition namespace.h:91
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
The null pointer constant.
The plus expression Associativity is not specified.
Definition std_expr.h:1002
The pointer type These are both 'bitvector_typet' (they have a width) and 'type_with_subtypet' (they ...
The symbol table base class interface.
const symbolt * lookup(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
virtual std::pair< symbolt &, bool > insert(symbolt symbol)=0
Move or copy a new symbol to the symbol table.
std::list< symbolst::const_iterator > match_name_or_base_name(const irep_idt &id) const
Collect all symbols the name of which matches id or the base name of which matches id.
const symbolst & symbols
Read-only field, used to look up symbols given their names.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
Symbol table entry.
Definition symbol.h:28
source_locationt location
Source code location of definition of symbol.
Definition symbol.h:37
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition symbol.cpp:121
typet type
Type of symbol.
Definition symbol.h:31
irep_idt name
The unique identifier.
Definition symbol.h:40
irep_idt irep_idt base_name
Name of module the symbol belongs to.
Definition symbol.h:46
exprt value
Initial value of symbol.
Definition symbol.h:34
irep_idt mode
Language mode.
Definition symbol.h:49
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition std_expr.h:2081
#define CPROVER_PREFIX
std::string type2c(const typet &type, const namespacet &ns, const expr2c_configurationt &configuration)
Definition expr2c.cpp:4171
Goto Programs with Functions.
const std::string & id2string(const irep_idt &d)
Definition irep.h:44
static int8_t r
Definition irep_hash.h:60
API to expression classes for Pointers.
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
BigInt mp_integer
Definition smt_terms.h:17
#define PRECONDITION(CONDITION)
Definition invariant.h:463
static std::optional< codet > static_lifetime_init(const irep_idt &identifier, symbol_table_baset &symbol_table)
#define INITIALIZE_FUNCTION
const code_typet & to_code_type(const typet &type)
Cast a typet to a code_typet.
Definition std_types.h:788
const array_typet & to_array_type(const typet &type)
Cast a typet to an array_typet.
Definition std_types.h:888
static exprt is_null(const array_string_exprt &string, array_poolt &array_pool)
Expression which is true when the string is equal to the literal "null".
Author: Diffblue Ltd.
#define size_type
Definition unistd.c:186