CBMC
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
c_preprocess.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module:
4
5Author: Daniel Kroening, kroening@kroening.com
6
7\*******************************************************************/
8
9#include "c_preprocess.h"
10
11#include <util/c_types.h>
12#include <util/config.h>
13#include <util/message.h>
14#include <util/prefix.h>
15#include <util/run.h>
16#include <util/suffix.h>
17#include <util/tempfile.h>
18#include <util/unicode.h>
19
20#include <fstream>
21
22static void error_parse_line(
23 const std::string &line,
24 bool warning_only,
25 messaget &message)
26{
27 std::string error_msg=line;
29
30 if(has_prefix(line, "file "))
31 {
32 const char *tptr=line.c_str();
33 int state=0;
34 std::string file, line_no, column, _error_msg, function;
35
36 tptr+=5;
37
38 char previous=0;
39
40 while(*tptr!=0)
41 {
42 if(has_prefix(tptr, " line ") && state != 4)
43 {
44 state=1;
45 tptr+=6;
46 continue;
47 }
48 else if(has_prefix(tptr, " column ") && state != 4)
49 {
50 state=2;
51 tptr+=8;
52 continue;
53 }
54 else if(has_prefix(tptr, " function ") && state != 4)
55 {
56 state=3;
57 tptr+=10;
58 continue;
59 }
60 else if(*tptr==':' && state!=4)
61 {
62 if(tptr[1]==' ' && previous!=':')
63 {
64 state=4;
65 tptr++;
66 while(*tptr==' ') tptr++;
67 continue;
68 }
69 }
70
71 if(state==0) // file
72 file+=*tptr;
73 else if(state==1) // line number
74 line_no+=*tptr;
75 else if(state==2) // column
76 column+=*tptr;
77 else if(state==3) // function
78 function+=*tptr;
79 else if(state==4) // error message
81
82 previous=*tptr;
83
84 tptr++;
85 }
86
87 if(state==4)
88 {
89 saved_error_location.set_file(file);
90 saved_error_location.set_function(function);
91 saved_error_location.set_line(line_no);
92 saved_error_location.set_column(column);
94 }
95 }
96 else if(has_prefix(line, "In file included from "))
97 {
98 }
99 else
100 {
101 const char *tptr=line.c_str();
102 int state=0;
103 std::string file, line_no;
104
105 while(*tptr!=0)
106 {
107 if(state==0)
108 {
109 if(*tptr==':')
110 state++;
111 else
112 file+=*tptr;
113 }
114 else if(state==1)
115 {
116 if(*tptr==':')
117 state++;
118 else if(isdigit(*tptr))
119 line_no+=*tptr;
120 else
121 state=3;
122 }
123
124 tptr++;
125 }
126
127 if(state==2)
128 {
129 saved_error_location.set_file(file);
130 saved_error_location.set_function(irep_idt());
131 saved_error_location.set_line(line_no);
132 saved_error_location.set_column(irep_idt());
133 }
134 }
135
137 warning_only ? message.warning() : message.error();
139 m << error_msg << messaget::eom;
140}
141
142static void error_parse(
143 std::istream &errors,
144 bool warning_only,
145 messaget &message)
146{
147 std::string line;
148
149 while(std::getline(errors, line))
150 error_parse_line(line, warning_only, message);
151}
152
155 std::istream &instream,
156 std::ostream &outstream,
157 message_handlert &message_handler)
158{
159 temporary_filet tmp_file("tmp.stdin", ".c");
160
161 std::ofstream tmp(tmp_file());
162
163 if(!tmp)
164 {
165 messaget message(message_handler);
166 message.error() << "failed to open temporary file" << messaget::eom;
167 return true; // error
168 }
169
170 tmp << instream.rdbuf(); // copy
171
172 tmp.close(); // flush
173
174 bool result=c_preprocess(tmp_file(), outstream, message_handler);
175
176 return result;
177}
178
180static bool is_dot_i_file(const std::string &path)
181{
182 return has_suffix(path, ".i") || has_suffix(path, ".ii");
183}
184
187 const std::string &, std::ostream &, message_handlert &);
189 const std::string &, std::ostream &, message_handlert &);
191 const std::string &,
192 std::ostream &,
196 const std::string &, std::ostream &, message_handlert &);
198 const std::string &, std::ostream &, message_handlert &);
199
201 const std::string &path,
202 std::ostream &outstream,
203 message_handlert &message_handler)
204{
205 switch(config.ansi_c.preprocessor)
206 {
208 return c_preprocess_codewarrior(path, outstream, message_handler);
209
211 return
213 path, outstream, message_handler, config.ansi_c.preprocessor);
214
216 return
218 path, outstream, message_handler, config.ansi_c.preprocessor);
219
221 return c_preprocess_visual_studio(path, outstream, message_handler);
222
224 return c_preprocess_arm(path, outstream, message_handler);
225
227 return c_preprocess_none(path, outstream, message_handler);
228 }
229
230 // not reached
231 return true;
232}
233
236 const std::string &file,
237 std::ostream &outstream,
238 message_handlert &message_handler)
239{
240 // check extension
241 if(is_dot_i_file(file))
242 return c_preprocess_none(file, outstream, message_handler);
243
244 messaget message(message_handler);
245
246 // use Visual Studio's CL
247
248 temporary_filet stderr_file("tmp.stderr", "");
249 temporary_filet command_file_name("tmp.cl-cmd", "");
250
251 {
252 std::ofstream command_file(command_file_name());
253
254 // This marks the command file as UTF-8, which Visual Studio
255 // understands.
256 command_file << char(0xef) << char(0xbb) << char(0xbf);
257
258 command_file << "/nologo" << '\n';
259 command_file << "/E" << '\n';
260
261 // This option will make CL produce utf-8 output, as
262 // opposed to 8-bit with some code page.
263 // It only works on Visual Studio 2015 or newer.
264 command_file << "/source-charset:utf-8" << '\n';
265
266 command_file << "/D__CPROVER__" << "\n";
267 command_file << "/D__WORDSIZE=" << config.ansi_c.pointer_width << "\n";
268
270 {
271 command_file << "\"/D__PTRDIFF_TYPE__=long long int\"" << "\n";
272 // yes, both _WIN32 and _WIN64 get defined
273 command_file << "/D_WIN64" << "\n";
274 }
275 else if(config.ansi_c.int_width == 16 && config.ansi_c.pointer_width == 32)
276 {
277 // 16-bit LP32 is an artificial architecture we simulate when using --16
280 "Pointer difference expected to be long int typed");
281 command_file << "/D__PTRDIFF_TYPE__=long" << '\n';
282 }
283 else
284 {
287 "Pointer difference expected to be int typed");
288 command_file << "/D__PTRDIFF_TYPE__=int" << "\n";
289 }
290
291 if(config.ansi_c.char_is_unsigned)
292 command_file << "/J" << "\n"; // This causes _CHAR_UNSIGNED to be defined
293
294 for(const auto &define : config.ansi_c.defines)
295 command_file << "/D" << shell_quote(define) << "\n";
296
297 for(const auto &include_path : config.ansi_c.include_paths)
298 command_file << "/I" << shell_quote(include_path) << "\n";
299
300 for(const auto &include_file : config.ansi_c.include_files)
301 command_file << "/FI" << shell_quote(include_file) << "\n";
302
303 // Finally, the file to be preprocessed
304 // (this is already in UTF-8).
305 command_file << shell_quote(file) << "\n";
306 }
307
308 // _popen isn't very reliable on WIN32
309 // that's why we use run()
310 int result =
311 run("cl", {"cl", "@" + command_file_name()}, "", outstream, stderr_file());
312
313 // errors/warnings
314 std::ifstream stderr_stream(stderr_file());
315 error_parse(stderr_stream, result==0, message);
316
317 if(result!=0)
318 {
319 message.error() << "CL Preprocessing failed" << messaget::eom;
320 return true;
321 }
322
323 return false;
324}
325
328 std::istream &instream,
329 std::ostream &outstream)
330{
331 // CodeWarrior prepends some header to the file,
332 // marked with '#' signs.
333 // We skip over it.
334 //
335 // CodeWarrior has an ugly way of marking lines, e.g.:
336 //
337 // /* #line 1 "__ppc_eabi_init.cpp" /* stack depth 0 */
338 //
339 // We remove the initial '/* ' prefix
340
341 std::string line;
342
343 while(instream)
344 {
345 std::getline(instream, line);
346
347 if(line.size()>=2 &&
348 line[0]=='#' && (line[1]=='#' || line[1]==' ' || line[1]=='\t'))
349 {
350 // skip the line!
351 }
352 else if(line.size()>=3 &&
353 line[0]=='/' && line[1]=='*' && line[2]==' ')
354 {
355 outstream << line.c_str()+3 << "\n"; // strip the '/* '
356 }
357 else
358 outstream << line << "\n";
359 }
360}
361
364 const std::string &file,
365 std::ostream &outstream,
366 message_handlert &message_handler)
367{
368 // check extension
369 if(is_dot_i_file(file))
370 return c_preprocess_none(file, outstream, message_handler);
371
372 // preprocessing
373 messaget message(message_handler);
374
375 temporary_filet stderr_file("tmp.stderr", "");
376
377 std::vector<std::string> command = {
378 "mwcceppc", "-E", "-P", "-D__CPROVER__", "-ppopt", "line", "-ppopt full"};
379
380 for(const auto &define : config.ansi_c.defines)
381 command.push_back(" -D" + define);
382
383 for(const auto &include_path : config.ansi_c.include_paths)
384 command.push_back(" -I" + include_path);
385
386 for(const auto &include_file : config.ansi_c.include_files)
387 {
388 command.push_back(" -include");
389 command.push_back(include_file);
390 }
391
392 for(const auto &opt : config.ansi_c.preprocessor_options)
393 command.push_back(opt);
394
395 temporary_filet tmpi("tmp.cl", "");
396 command.push_back(file);
397 command.push_back("-o");
398 command.push_back(tmpi());
399
400 int result = run(command[0], command, "", "", stderr_file());
401
402 std::ifstream stream_i(tmpi());
403
404 if(stream_i)
405 {
407
408 stream_i.close();
409 }
410 else
411 {
412 message.error() << "Preprocessing failed (fopen failed)"
413 << messaget::eom;
414 return true;
415 }
416
417 // errors/warnings
418 std::ifstream stderr_stream(stderr_file());
419 error_parse(stderr_stream, result==0, message);
420
421 if(result!=0)
422 {
423 message.error() << "Preprocessing failed" << messaget::eom;
424 return true;
425 }
426
427 return false;
428}
429
432 const std::string &file,
433 std::ostream &outstream,
434 message_handlert &message_handler,
436{
437 // check extension
438 if(is_dot_i_file(file))
439 return c_preprocess_none(file, outstream, message_handler);
440
441 // preprocessing
442 messaget message(message_handler);
443
444 temporary_filet stderr_file("tmp.stderr", "");
445
446 std::vector<std::string> argv;
447
449 argv.push_back("clang");
450 else
451 argv.push_back("gcc");
452
453 argv.push_back("-E");
454 argv.push_back("-D__CPROVER__");
455
456 const irep_idt &arch = config.ansi_c.arch;
457
458 if(config.ansi_c.pointer_width == 16)
459 {
460 if(arch == "i386" || arch == "x86_64" || arch == "x32")
461 argv.push_back("-m16");
462 else if(arch.starts_with("mips"))
463 argv.push_back("-mips16");
464 }
465 else if(config.ansi_c.pointer_width == 32)
466 {
467 if(arch == "i386" || arch == "x86_64")
468 argv.push_back("-m32");
469 else if(arch == "x32")
470 argv.push_back("-mx32");
471 else if(arch.starts_with("mips"))
472 argv.push_back("-mabi=32");
473 else if(arch == "powerpc" || arch == "ppc64" || arch == "ppc64le")
474 argv.push_back("-m32");
475 else if(arch == "s390" || arch == "s390x")
476 argv.push_back("-m31"); // yes, 31, not 32!
477 else if(arch == "sparc" || arch == "sparc64")
478 argv.push_back("-m32");
479 }
480 else if(config.ansi_c.pointer_width == 64)
481 {
482 if(arch == "i386" || arch == "x86_64" || arch == "x32")
483 argv.push_back("-m64");
484 else if(arch.starts_with("mips"))
485 argv.push_back("-mabi=64");
486 else if(arch == "powerpc" || arch == "ppc64" || arch == "ppc64le")
487 argv.push_back("-m64");
488 else if(arch == "s390" || arch == "s390x")
489 argv.push_back("-m64");
490 else if(arch == "sparc" || arch == "sparc64")
491 argv.push_back("-m64");
492 }
493
494 // The width of wchar_t depends on the OS!
495 if(config.ansi_c.wchar_t_width == config.ansi_c.short_int_width)
496 argv.push_back("-fshort-wchar");
497
498 if(config.ansi_c.char_is_unsigned)
499 argv.push_back("-funsigned-char");
500
502 argv.push_back("-nostdinc");
503
504 // Set the standard
505 if(
506 has_suffix(file, ".cpp") || has_suffix(file, ".CPP") ||
508 has_suffix(file, ".C") ||
509#endif
510 has_suffix(file, ".c++") || has_suffix(file, ".C++") ||
511 has_suffix(file, ".cp") || has_suffix(file, ".CP") ||
512 has_suffix(file, ".cc") || has_suffix(file, ".cxx"))
513 {
514 switch(config.cpp.cpp_standard)
515 {
517#if defined(__OpenBSD__)
519 argv.push_back("-std=c++98");
520 else
521#endif
522 argv.push_back("-std=gnu++98");
523 break;
524
526#if defined(__OpenBSD__)
528 argv.push_back("-std=c++03");
529 else
530#endif
531 argv.push_back("-std=gnu++03");
532 break;
533
535#if defined(__OpenBSD__)
537 argv.push_back("-std=c++11");
538 else
539#endif
540 argv.push_back("-std=gnu++11");
541 break;
542
544#if defined(__OpenBSD__)
546 argv.push_back("-std=c++14");
547 else
548#endif
549 argv.push_back("-std=gnu++14");
550 break;
551
553#if defined(__OpenBSD__)
555 argv.push_back("-std=c++17");
556 else
557#endif
558 argv.push_back("-std=gnu++17");
559 break;
560 }
561 }
562 else
563 {
564 switch(config.ansi_c.c_standard)
565 {
567#if defined(__OpenBSD__)
569 argv.push_back("-std=c89");
570 else
571#endif
572 argv.push_back("-std=gnu89");
573 break;
574
576#if defined(__OpenBSD__)
578 argv.push_back("-std=c99");
579 else
580#endif
581 argv.push_back("-std=gnu99");
582 break;
583
585#if defined(__OpenBSD__)
587 argv.push_back("-std=c11");
588 else
589#endif
590 argv.push_back("-std=gnu11");
591 break;
592 }
593 }
594
595 for(const auto &define : config.ansi_c.defines)
596 argv.push_back("-D" + define);
597
598 for(const auto &include_path : config.ansi_c.include_paths)
599 argv.push_back("-I" + include_path);
600
601 for(const auto &include_file : config.ansi_c.include_files)
602 {
603 argv.push_back("-include");
604 argv.push_back(include_file);
605 }
606
607 for(const auto &opt : config.ansi_c.preprocessor_options)
608 argv.push_back(opt);
609
610 int result;
611
612 #if 0
613 // the following forces the mode
614 switch(config.ansi_c.mode)
615 {
616 case configt::ansi_ct::flavourt::GCC_C: command+=" -x c"; break;
617 case configt::ansi_ct::flavourt::GCC_CPP: command+=" -x c++"; break;
618 default:
619 {
620 }
621 }
622 #endif
623
624 // the file that is to be preprocessed
625 argv.push_back(file);
626
627 // execute clang or gcc
628 result = run(argv[0], argv, "", outstream, stderr_file());
629
630 // errors/warnings
631 std::ifstream stderr_stream(stderr_file());
632 error_parse(stderr_stream, result==0, message);
633
634 if(result!=0)
635 {
636 message.error() << "GCC preprocessing failed" << messaget::eom;
637 return true;
638 }
639
640 return false;
641}
642
645 const std::string &file,
646 std::ostream &outstream,
647 message_handlert &message_handler)
648{
649 // check extension
650 if(is_dot_i_file(file))
651 return c_preprocess_none(file, outstream, message_handler);
652
653 // preprocessing using armcc
654 messaget message(message_handler);
655
656 temporary_filet stderr_file("tmp.stderr", "");
657
658 std::vector<std::string> argv;
659
660 argv.push_back("armcc");
661 argv.push_back("-E");
662 argv.push_back("-D__CPROVER__");
663
665 argv.push_back("--bigend");
666 else
667 argv.push_back("--littleend");
668
669 if(config.ansi_c.char_is_unsigned)
670 argv.push_back("--unsigned_chars");
671 else
672 argv.push_back("--signed_chars");
673
674 // Set the standard
675 switch(config.ansi_c.c_standard)
676 {
678 argv.push_back("--c90");
679 break;
680
683 argv.push_back("--c99");
684 break;
685 }
686
687 for(const auto &define : config.ansi_c.defines)
688 argv.push_back("-D" + define);
689
690 for(const auto &include_path : config.ansi_c.include_paths)
691 argv.push_back("-I" + include_path);
692
693 // the file that is to be preprocessed
694 argv.push_back(file);
695
696 int result;
697
698 // execute armcc
699 result = run(argv[0], argv, "", outstream, stderr_file());
700
701 // errors/warnings
702 std::ifstream stderr_stream(stderr_file());
703 error_parse(stderr_stream, result==0, message);
704
705 if(result!=0)
706 {
707 message.error() << "ARMCC preprocessing failed" << messaget::eom;
708 return true;
709 }
710
711 return false;
712}
713
716 const std::string &file,
717 std::ostream &outstream,
718 message_handlert &message_handler)
719{
720 std::ifstream infile(widen_if_needed(file));
721
722 if(!infile)
723 {
724 messaget message(message_handler);
725 message.error() << "failed to open '" << file << "'" << messaget::eom;
726 return true;
727 }
728
730 {
731 // special treatment for "/* #line"
733 }
734 else
735 {
736 char ch;
737
738 while(infile.read(&ch, 1))
739 outstream << ch;
740 }
741
742 return false;
743}
744
746const char c_test_program[]=
747 "#include <stdlib.h>\n"
748 "\n"
749 "int main() { }\n";
750
752{
753 std::ostringstream out;
754 std::istringstream in(c_test_program);
755
756 return c_preprocess(in, out, message_handler);
757}
configt config
Definition config.cpp:25
bool c_preprocess_visual_studio(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
bool c_preprocess_arm(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
bool c_preprocess_none(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
static void error_parse(std::istream &errors, bool warning_only, messaget &message)
const char c_test_program[]
tests ANSI-C preprocessing
bool c_preprocess_codewarrior(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
static void error_parse_line(const std::string &line, bool warning_only, messaget &message)
void postprocess_codewarrior(std::istream &instream, std::ostream &outstream)
post-processing specifically for CodeWarrior
bool c_preprocess(std::istream &instream, std::ostream &outstream, message_handlert &message_handler)
ANSI-C preprocessing.
bool test_c_preprocessor(message_handlert &message_handler)
bool c_preprocess_gcc_clang(const std::string &, std::ostream &, message_handlert &, configt::ansi_ct::preprocessort)
ANSI-C preprocessing.
static bool is_dot_i_file(const std::string &path)
ANSI-C preprocessing.
signedbv_typet signed_long_int_type()
Definition c_types.cpp:72
signedbv_typet signed_int_type()
Definition c_types.cpp:22
signedbv_typet pointer_diff_type()
Definition c_types.cpp:220
signedbv_typet signed_long_long_int_type()
Definition c_types.cpp:79
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:562
struct configt::cppt cpp
struct configt::ansi_ct ansi_c
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition dstring.h:38
bool starts_with(const char *s) const
equivalent of as_string().starts_with(s)
Definition dstring.h:95
source_locationt source_location
Definition message.h:239
Class that provides messages with a built-in verbosity 'level'.
Definition message.h:154
mstreamt & error() const
Definition message.h:391
mstreamt & warning() const
Definition message.h:396
static eomt eom
Definition message.h:289
bool has_prefix(const std::string &s, const std::string &prefix)
Definition converter.cpp:13
int isdigit(int c)
Definition ctype.c:24
int run(const std::string &what, const std::vector< std::string > &argv)
Definition run.cpp:48
std::string shell_quote(const std::string &src)
quote a string for bash and CMD
Definition run.cpp:451
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition invariant.h:534
bool has_suffix(const std::string &s, const std::string &suffix)
Definition suffix.h:17
#define widen_if_needed(s)
Definition unicode.h:28
dstringt irep_idt