CBMC
c_preprocess.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module:
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
9 #include "c_preprocess.h"
10 
11 #include <util/c_types.h>
12 #include <util/config.h>
13 #include <util/message.h>
14 #include <util/prefix.h>
15 #include <util/run.h>
16 #include <util/suffix.h>
17 #include <util/tempfile.h>
18 #include <util/unicode.h>
19 
20 #include <fstream>
21 
22 static void error_parse_line(
23  const std::string &line,
24  bool warning_only,
25  messaget &message)
26 {
27  std::string error_msg=line;
28  source_locationt saved_error_location;
29 
30  if(has_prefix(line, "file "))
31  {
32  const char *tptr=line.c_str();
33  int state=0;
34  std::string file, line_no, column, _error_msg, function;
35 
36  tptr+=5;
37 
38  char previous=0;
39 
40  while(*tptr!=0)
41  {
42  if(has_prefix(tptr, " line ") && state != 4)
43  {
44  state=1;
45  tptr+=6;
46  continue;
47  }
48  else if(has_prefix(tptr, " column ") && state != 4)
49  {
50  state=2;
51  tptr+=8;
52  continue;
53  }
54  else if(has_prefix(tptr, " function ") && state != 4)
55  {
56  state=3;
57  tptr+=10;
58  continue;
59  }
60  else if(*tptr==':' && state!=4)
61  {
62  if(tptr[1]==' ' && previous!=':')
63  {
64  state=4;
65  tptr++;
66  while(*tptr==' ') tptr++;
67  continue;
68  }
69  }
70 
71  if(state==0) // file
72  file+=*tptr;
73  else if(state==1) // line number
74  line_no+=*tptr;
75  else if(state==2) // column
76  column+=*tptr;
77  else if(state==3) // function
78  function+=*tptr;
79  else if(state==4) // error message
80  _error_msg+=*tptr;
81 
82  previous=*tptr;
83 
84  tptr++;
85  }
86 
87  if(state==4)
88  {
89  saved_error_location.set_file(file);
90  saved_error_location.set_function(function);
91  saved_error_location.set_line(line_no);
92  saved_error_location.set_column(column);
93  error_msg=_error_msg;
94  }
95  }
96  else if(has_prefix(line, "In file included from "))
97  {
98  }
99  else
100  {
101  const char *tptr=line.c_str();
102  int state=0;
103  std::string file, line_no;
104 
105  while(*tptr!=0)
106  {
107  if(state==0)
108  {
109  if(*tptr==':')
110  state++;
111  else
112  file+=*tptr;
113  }
114  else if(state==1)
115  {
116  if(*tptr==':')
117  state++;
118  else if(isdigit(*tptr))
119  line_no+=*tptr;
120  else
121  state=3;
122  }
123 
124  tptr++;
125  }
126 
127  if(state==2)
128  {
129  saved_error_location.set_file(file);
130  saved_error_location.set_function(irep_idt());
131  saved_error_location.set_line(line_no);
132  saved_error_location.set_column(irep_idt());
133  }
134  }
135 
137  warning_only ? message.warning() : message.error();
138  m.source_location=saved_error_location;
139  m << error_msg << messaget::eom;
140 }
141 
142 static void error_parse(
143  std::istream &errors,
144  bool warning_only,
145  messaget &message)
146 {
147  std::string line;
148 
149  while(std::getline(errors, line))
150  error_parse_line(line, warning_only, message);
151 }
152 
155  std::istream &instream,
156  std::ostream &outstream,
157  message_handlert &message_handler)
158 {
159  temporary_filet tmp_file("tmp.stdin", ".c");
160 
161  std::ofstream tmp(tmp_file());
162 
163  if(!tmp)
164  {
165  messaget message(message_handler);
166  message.error() << "failed to open temporary file" << messaget::eom;
167  return true; // error
168  }
169 
170  tmp << instream.rdbuf(); // copy
171 
172  tmp.close(); // flush
173 
174  bool result=c_preprocess(tmp_file(), outstream, message_handler);
175 
176  return result;
177 }
178 
180 static bool is_dot_i_file(const std::string &path)
181 {
182  return has_suffix(path, ".i") || has_suffix(path, ".ii");
183 }
184 
187  const std::string &, std::ostream &, message_handlert &);
188 bool c_preprocess_arm(
189  const std::string &, std::ostream &, message_handlert &);
191  const std::string &,
192  std::ostream &,
195 bool c_preprocess_none(
196  const std::string &, std::ostream &, message_handlert &);
198  const std::string &, std::ostream &, message_handlert &);
199 
201  const std::string &path,
202  std::ostream &outstream,
203  message_handlert &message_handler)
204 {
205  switch(config.ansi_c.preprocessor)
206  {
208  return c_preprocess_codewarrior(path, outstream, message_handler);
209 
211  return
213  path, outstream, message_handler, config.ansi_c.preprocessor);
214 
216  return
218  path, outstream, message_handler, config.ansi_c.preprocessor);
219 
221  return c_preprocess_visual_studio(path, outstream, message_handler);
222 
224  return c_preprocess_arm(path, outstream, message_handler);
225 
227  return c_preprocess_none(path, outstream, message_handler);
228  }
229 
230  // not reached
231  return true;
232 }
233 
236  const std::string &file,
237  std::ostream &outstream,
238  message_handlert &message_handler)
239 {
240  // check extension
241  if(is_dot_i_file(file))
242  return c_preprocess_none(file, outstream, message_handler);
243 
244  messaget message(message_handler);
245 
246  // use Visual Studio's CL
247 
248  temporary_filet stderr_file("tmp.stderr", "");
249  temporary_filet command_file_name("tmp.cl-cmd", "");
250 
251  {
252  std::ofstream command_file(command_file_name());
253 
254  // This marks the command file as UTF-8, which Visual Studio
255  // understands.
256  command_file << char(0xef) << char(0xbb) << char(0xbf);
257 
258  command_file << "/nologo" << '\n';
259  command_file << "/E" << '\n';
260 
261  // This option will make CL produce utf-8 output, as
262  // opposed to 8-bit with some code page.
263  // It only works on Visual Studio 2015 or newer.
264  command_file << "/source-charset:utf-8" << '\n';
265 
266  command_file << "/D__CPROVER__" << "\n";
267  command_file << "/D__WORDSIZE=" << config.ansi_c.pointer_width << "\n";
268 
270  {
271  command_file << "\"/D__PTRDIFF_TYPE__=long long int\"" << "\n";
272  // yes, both _WIN32 and _WIN64 get defined
273  command_file << "/D_WIN64" << "\n";
274  }
275  else if(config.ansi_c.int_width == 16 && config.ansi_c.pointer_width == 32)
276  {
277  // 16-bit LP32 is an artificial architecture we simulate when using --16
280  "Pointer difference expected to be long int typed");
281  command_file << "/D__PTRDIFF_TYPE__=long" << '\n';
282  }
283  else
284  {
287  "Pointer difference expected to be int typed");
288  command_file << "/D__PTRDIFF_TYPE__=int" << "\n";
289  }
290 
292  command_file << "/J" << "\n"; // This causes _CHAR_UNSIGNED to be defined
293 
294  for(const auto &define : config.ansi_c.defines)
295  command_file << "/D" << shell_quote(define) << "\n";
296 
297  for(const auto &include_path : config.ansi_c.include_paths)
298  command_file << "/I" << shell_quote(include_path) << "\n";
299 
300  for(const auto &include_file : config.ansi_c.include_files)
301  command_file << "/FI" << shell_quote(include_file) << "\n";
302 
303  // Finally, the file to be preprocessed
304  // (this is already in UTF-8).
305  command_file << shell_quote(file) << "\n";
306  }
307 
308  // _popen isn't very reliable on WIN32
309  // that's why we use run()
310  int result =
311  run("cl", {"cl", "@" + command_file_name()}, "", outstream, stderr_file());
312 
313  // errors/warnings
314  std::ifstream stderr_stream(stderr_file());
315  error_parse(stderr_stream, result==0, message);
316 
317  if(result!=0)
318  {
319  message.error() << "CL Preprocessing failed" << messaget::eom;
320  return true;
321  }
322 
323  return false;
324 }
325 
328  std::istream &instream,
329  std::ostream &outstream)
330 {
331  // CodeWarrior prepends some header to the file,
332  // marked with '#' signs.
333  // We skip over it.
334  //
335  // CodeWarrior has an ugly way of marking lines, e.g.:
336  //
337  // /* #line 1 "__ppc_eabi_init.cpp" /* stack depth 0 */
338  //
339  // We remove the initial '/* ' prefix
340 
341  std::string line;
342 
343  while(instream)
344  {
345  std::getline(instream, line);
346 
347  if(line.size()>=2 &&
348  line[0]=='#' && (line[1]=='#' || line[1]==' ' || line[1]=='\t'))
349  {
350  // skip the line!
351  }
352  else if(line.size()>=3 &&
353  line[0]=='/' && line[1]=='*' && line[2]==' ')
354  {
355  outstream << line.c_str()+3 << "\n"; // strip the '/* '
356  }
357  else
358  outstream << line << "\n";
359  }
360 }
361 
364  const std::string &file,
365  std::ostream &outstream,
366  message_handlert &message_handler)
367 {
368  // check extension
369  if(is_dot_i_file(file))
370  return c_preprocess_none(file, outstream, message_handler);
371 
372  // preprocessing
373  messaget message(message_handler);
374 
375  temporary_filet stderr_file("tmp.stderr", "");
376 
377  std::vector<std::string> command = {
378  "mwcceppc", "-E", "-P", "-D__CPROVER__", "-ppopt", "line", "-ppopt full"};
379 
380  for(const auto &define : config.ansi_c.defines)
381  command.push_back(" -D" + define);
382 
383  for(const auto &include_path : config.ansi_c.include_paths)
384  command.push_back(" -I" + include_path);
385 
386  for(const auto &include_file : config.ansi_c.include_files)
387  {
388  command.push_back(" -include");
389  command.push_back(include_file);
390  }
391 
392  for(const auto &opt : config.ansi_c.preprocessor_options)
393  command.push_back(opt);
394 
395  temporary_filet tmpi("tmp.cl", "");
396  command.push_back(file);
397  command.push_back("-o");
398  command.push_back(tmpi());
399 
400  int result = run(command[0], command, "", "", stderr_file());
401 
402  std::ifstream stream_i(tmpi());
403 
404  if(stream_i)
405  {
406  postprocess_codewarrior(stream_i, outstream);
407 
408  stream_i.close();
409  }
410  else
411  {
412  message.error() << "Preprocessing failed (fopen failed)"
413  << messaget::eom;
414  return true;
415  }
416 
417  // errors/warnings
418  std::ifstream stderr_stream(stderr_file());
419  error_parse(stderr_stream, result==0, message);
420 
421  if(result!=0)
422  {
423  message.error() << "Preprocessing failed" << messaget::eom;
424  return true;
425  }
426 
427  return false;
428 }
429 
432  const std::string &file,
433  std::ostream &outstream,
434  message_handlert &message_handler,
435  configt::ansi_ct::preprocessort preprocessor)
436 {
437  // check extension
438  if(is_dot_i_file(file))
439  return c_preprocess_none(file, outstream, message_handler);
440 
441  // preprocessing
442  messaget message(message_handler);
443 
444  temporary_filet stderr_file("tmp.stderr", "");
445 
446  std::vector<std::string> argv;
447 
449  argv.push_back("clang");
450  else
451  argv.push_back("gcc");
452 
453  argv.push_back("-E");
454  argv.push_back("-D__CPROVER__");
455 
456  const irep_idt &arch = config.ansi_c.arch;
457 
458  if(config.ansi_c.pointer_width == 16)
459  {
460  if(arch == "i386" || arch == "x86_64" || arch == "x32")
461  argv.push_back("-m16");
462  else if(arch.starts_with("mips"))
463  argv.push_back("-mips16");
464  }
465  else if(config.ansi_c.pointer_width == 32)
466  {
467  if(arch == "i386" || arch == "x86_64")
468  argv.push_back("-m32");
469  else if(arch == "x32")
470  argv.push_back("-mx32");
471  else if(arch.starts_with("mips"))
472  argv.push_back("-mabi=32");
473  else if(arch == "powerpc" || arch == "ppc64" || arch == "ppc64le")
474  argv.push_back("-m32");
475  else if(arch == "s390" || arch == "s390x")
476  argv.push_back("-m31"); // yes, 31, not 32!
477  else if(arch == "sparc" || arch == "sparc64")
478  argv.push_back("-m32");
479  }
480  else if(config.ansi_c.pointer_width == 64)
481  {
482  if(arch == "i386" || arch == "x86_64" || arch == "x32")
483  argv.push_back("-m64");
484  else if(arch.starts_with("mips"))
485  argv.push_back("-mabi=64");
486  else if(arch == "powerpc" || arch == "ppc64" || arch == "ppc64le")
487  argv.push_back("-m64");
488  else if(arch == "s390" || arch == "s390x")
489  argv.push_back("-m64");
490  else if(arch == "sparc" || arch == "sparc64")
491  argv.push_back("-m64");
492  }
493 
494  // The width of wchar_t depends on the OS!
496  argv.push_back("-fshort-wchar");
497 
499  argv.push_back("-funsigned-char");
500 
502  argv.push_back("-nostdinc");
503 
504  // Set the standard
505  if(
506  has_suffix(file, ".cpp") || has_suffix(file, ".CPP") ||
507 #ifndef _WIN32
508  has_suffix(file, ".C") ||
509 #endif
510  has_suffix(file, ".c++") || has_suffix(file, ".C++") ||
511  has_suffix(file, ".cp") || has_suffix(file, ".CP") ||
512  has_suffix(file, ".cc") || has_suffix(file, ".cxx"))
513  {
514  switch(config.cpp.cpp_standard)
515  {
517 #if defined(__OpenBSD__)
518  if(preprocessor == configt::ansi_ct::preprocessort::CLANG)
519  argv.push_back("-std=c++98");
520  else
521 #endif
522  argv.push_back("-std=gnu++98");
523  break;
524 
526 #if defined(__OpenBSD__)
527  if(preprocessor == configt::ansi_ct::preprocessort::CLANG)
528  argv.push_back("-std=c++03");
529  else
530 #endif
531  argv.push_back("-std=gnu++03");
532  break;
533 
535 #if defined(__OpenBSD__)
536  if(preprocessor == configt::ansi_ct::preprocessort::CLANG)
537  argv.push_back("-std=c++11");
538  else
539 #endif
540  argv.push_back("-std=gnu++11");
541  break;
542 
544 #if defined(__OpenBSD__)
545  if(preprocessor == configt::ansi_ct::preprocessort::CLANG)
546  argv.push_back("-std=c++14");
547  else
548 #endif
549  argv.push_back("-std=gnu++14");
550  break;
551 
553 #if defined(__OpenBSD__)
554  if(preprocessor == configt::ansi_ct::preprocessort::CLANG)
555  argv.push_back("-std=c++17");
556  else
557 #endif
558  argv.push_back("-std=gnu++17");
559  break;
560  }
561  }
562  else
563  {
564  switch(config.ansi_c.c_standard)
565  {
567 #if defined(__OpenBSD__)
568  if(preprocessor == configt::ansi_ct::preprocessort::CLANG)
569  argv.push_back("-std=c89");
570  else
571 #endif
572  argv.push_back("-std=gnu89");
573  break;
574 
576 #if defined(__OpenBSD__)
577  if(preprocessor == configt::ansi_ct::preprocessort::CLANG)
578  argv.push_back("-std=c99");
579  else
580 #endif
581  argv.push_back("-std=gnu99");
582  break;
583 
585 #if defined(__OpenBSD__)
586  if(preprocessor == configt::ansi_ct::preprocessort::CLANG)
587  argv.push_back("-std=c11");
588  else
589 #endif
590  argv.push_back("-std=gnu11");
591  break;
592  }
593  }
594 
595  for(const auto &define : config.ansi_c.defines)
596  argv.push_back("-D" + define);
597 
598  for(const auto &include_path : config.ansi_c.include_paths)
599  argv.push_back("-I" + include_path);
600 
601  for(const auto &include_file : config.ansi_c.include_files)
602  {
603  argv.push_back("-include");
604  argv.push_back(include_file);
605  }
606 
607  for(const auto &opt : config.ansi_c.preprocessor_options)
608  argv.push_back(opt);
609 
610  int result;
611 
612  #if 0
613  // the following forces the mode
614  switch(config.ansi_c.mode)
615  {
616  case configt::ansi_ct::flavourt::GCC_C: command+=" -x c"; break;
617  case configt::ansi_ct::flavourt::GCC_CPP: command+=" -x c++"; break;
618  default:
619  {
620  }
621  }
622  #endif
623 
624  // the file that is to be preprocessed
625  argv.push_back(file);
626 
627  // execute clang or gcc
628  result = run(argv[0], argv, "", outstream, stderr_file());
629 
630  // errors/warnings
631  std::ifstream stderr_stream(stderr_file());
632  error_parse(stderr_stream, result==0, message);
633 
634  if(result!=0)
635  {
636  message.error() << "GCC preprocessing failed" << messaget::eom;
637  return true;
638  }
639 
640  return false;
641 }
642 
645  const std::string &file,
646  std::ostream &outstream,
647  message_handlert &message_handler)
648 {
649  // check extension
650  if(is_dot_i_file(file))
651  return c_preprocess_none(file, outstream, message_handler);
652 
653  // preprocessing using armcc
654  messaget message(message_handler);
655 
656  temporary_filet stderr_file("tmp.stderr", "");
657 
658  std::vector<std::string> argv;
659 
660  argv.push_back("armcc");
661  argv.push_back("-E");
662  argv.push_back("-D__CPROVER__");
663 
665  argv.push_back("--bigend");
666  else
667  argv.push_back("--littleend");
668 
670  argv.push_back("--unsigned_chars");
671  else
672  argv.push_back("--signed_chars");
673 
674  // Set the standard
675  switch(config.ansi_c.c_standard)
676  {
678  argv.push_back("--c90");
679  break;
680 
683  argv.push_back("--c99");
684  break;
685  }
686 
687  for(const auto &define : config.ansi_c.defines)
688  argv.push_back("-D" + define);
689 
690  for(const auto &include_path : config.ansi_c.include_paths)
691  argv.push_back("-I" + include_path);
692 
693  // the file that is to be preprocessed
694  argv.push_back(file);
695 
696  int result;
697 
698  // execute armcc
699  result = run(argv[0], argv, "", outstream, stderr_file());
700 
701  // errors/warnings
702  std::ifstream stderr_stream(stderr_file());
703  error_parse(stderr_stream, result==0, message);
704 
705  if(result!=0)
706  {
707  message.error() << "ARMCC preprocessing failed" << messaget::eom;
708  return true;
709  }
710 
711  return false;
712 }
713 
716  const std::string &file,
717  std::ostream &outstream,
718  message_handlert &message_handler)
719 {
720  std::ifstream infile(widen_if_needed(file));
721 
722  if(!infile)
723  {
724  messaget message(message_handler);
725  message.error() << "failed to open '" << file << "'" << messaget::eom;
726  return true;
727  }
728 
730  {
731  // special treatment for "/* #line"
732  postprocess_codewarrior(infile, outstream);
733  }
734  else
735  {
736  char ch;
737 
738  while(infile.read(&ch, 1))
739  outstream << ch;
740  }
741 
742  return false;
743 }
744 
746 const char c_test_program[]=
747  "#include <stdlib.h>\n"
748  "\n"
749  "int main() { }\n";
750 
751 bool test_c_preprocessor(message_handlert &message_handler)
752 {
753  std::ostringstream out;
754  std::istringstream in(c_test_program);
755 
756  return c_preprocess(in, out, message_handler);
757 }
configt config
Definition: config.cpp:25
bool c_preprocess_visual_studio(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
bool c_preprocess_arm(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
bool c_preprocess_none(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
static void error_parse(std::istream &errors, bool warning_only, messaget &message)
const char c_test_program[]
tests ANSI-C preprocessing
bool c_preprocess_codewarrior(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
static void error_parse_line(const std::string &line, bool warning_only, messaget &message)
void postprocess_codewarrior(std::istream &instream, std::ostream &outstream)
post-processing specifically for CodeWarrior
bool c_preprocess(std::istream &instream, std::ostream &outstream, message_handlert &message_handler)
ANSI-C preprocessing.
bool test_c_preprocessor(message_handlert &message_handler)
bool c_preprocess_gcc_clang(const std::string &, std::ostream &, message_handlert &, configt::ansi_ct::preprocessort)
ANSI-C preprocessing.
static bool is_dot_i_file(const std::string &path)
ANSI-C preprocessing.
signedbv_typet signed_long_int_type()
Definition: c_types.cpp:72
signedbv_typet signed_int_type()
Definition: c_types.cpp:22
signedbv_typet pointer_diff_type()
Definition: c_types.cpp:220
signedbv_typet signed_long_long_int_type()
Definition: c_types.cpp:79
unsigned char opt
Definition: cegis.c:20
struct configt::cppt cpp
struct configt::ansi_ct ansi_c
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
bool starts_with(const char *s) const
equivalent of as_string().starts_with(s)
Definition: dstring.h:95
source_locationt source_location
Definition: message.h:247
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:155
mstreamt & error() const
Definition: message.h:399
mstreamt & warning() const
Definition: message.h:404
static eomt eom
Definition: message.h:297
void set_column(const irep_idt &column)
void set_file(const irep_idt &file)
void set_line(const irep_idt &line)
void set_function(const irep_idt &function)
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
int isdigit(int c)
Definition: ctype.c:24
int run(const std::string &what, const std::vector< std::string > &argv)
Definition: run.cpp:48
std::string shell_quote(const std::string &src)
quote a string for bash and CMD
Definition: run.cpp:451
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:534
std::list< std::string > include_paths
Definition: config.h:269
enum configt::ansi_ct::c_standardt c_standard
endiannesst endianness
Definition: config.h:207
std::size_t pointer_width
Definition: config.h:143
std::list< std::string > include_files
Definition: config.h:270
irep_idt arch
Definition: config.h:221
std::list< std::string > preprocessor_options
Definition: config.h:268
std::list< std::string > defines
Definition: config.h:266
std::size_t wchar_t_width
Definition: config.h:147
preprocessort preprocessor
Definition: config.h:264
bool char_is_unsigned
Definition: config.h:150
std::size_t short_int_width
Definition: config.h:141
flavourt mode
Definition: config.h:253
std::size_t int_width
Definition: config.h:137
enum configt::cppt::cpp_standardt cpp_standard
bool has_suffix(const std::string &s, const std::string &suffix)
Definition: suffix.h:17
#define widen_if_needed(s)
Definition: unicode.h:28
dstringt irep_idt