CBMC
c_wrangler.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: C Wrangler
4 
5 Author: Daniel Kroening, dkr@amazon.com
6 
7 \*******************************************************************/
8 
11 
12 #include "c_wrangler.h"
13 
14 #include <util/cprover_prefix.h>
15 #include <util/exception_utils.h>
16 #include <util/json.h>
17 #include <util/run.h>
18 #include <util/string_utils.h>
19 
20 #include "c_defines.h"
21 #include "ctokenit.h"
22 #include "mini_c_parser.h"
23 
24 #include <fstream> // IWYU pragma: keep
25 #include <iostream>
26 #include <list>
27 #include <map>
28 #include <regex>
29 #include <sstream>
30 
32 {
33  // sources and preprocessing
34  std::vector<std::string> source_files;
35  std::vector<std::string> includes;
36  std::vector<std::string> defines;
37 
38  // transformations
40  {
41  std::string clause;
42  std::string content;
43  function_contract_clauset(std::string _clause, std::string _content)
44  : clause(std::move(_clause)), content(std::move(_content))
45  {
46  }
47  };
48 
50  {
51  std::string loop_type;
52  std::string identifier;
53  std::string clause;
54  std::string content;
56  std::string _loop_type,
57  std::string _identifier,
58  std::string _clause,
59  std::string _content)
60  : loop_type(std::move(_loop_type)),
61  identifier(std::move(_identifier)),
62  clause(std::move(_clause)),
63  content(std::move(_content))
64  {
65  }
66  };
67 
68  struct assertiont
69  {
70  std::string identifier;
71  std::string content;
72  assertiont(std::string _identifier, std::string _content)
73  : identifier(std::move(_identifier)), content(std::move(_content))
74  {
75  }
76  };
77 
78  struct functiont
79  {
80  // should be variant to preserve ordering
81  std::vector<function_contract_clauset> function_contract;
82  std::vector<loop_contract_clauset> loop_contract;
83  std::vector<assertiont> assertions;
84  std::optional<std::string> stub;
85  bool remove_static = false;
86  };
87 
88  using functionst = std::list<std::pair<std::regex, functiont>>;
90 
91  struct objectt
92  {
93  bool remove_static = false;
94  };
95 
96  using objectst = std::list<std::pair<std::regex, objectt>>;
98 
99  // output
100  std::string output;
101 
102  void configure_sources(const jsont &);
103  void configure_functions(const jsont &);
104  void configure_objects(const jsont &);
105  void configure_output(const jsont &);
106 };
107 
109 {
110  auto sources = config["sources"];
111 
112  if(!sources.is_null())
113  {
114  if(!sources.is_array())
115  throw deserialization_exceptiont("sources entry must be sequence");
116 
117  for(const auto &source : to_json_array(sources))
118  {
119  if(!source.is_string())
120  throw deserialization_exceptiont("source must be string");
121 
122  this->source_files.push_back(source.value);
123  }
124  }
125 
126  auto includes = config["includes"];
127 
128  if(!includes.is_null())
129  {
130  if(!includes.is_array())
131  throw deserialization_exceptiont("includes entry must be sequence");
132 
133  for(const auto &include : to_json_array(includes))
134  {
135  if(!include.is_string())
136  throw deserialization_exceptiont("include must be string");
137 
138  this->includes.push_back(include.value);
139  }
140  }
141 
142  auto defines = config["defines"];
143 
144  if(!defines.is_null())
145  {
146  if(!defines.is_array())
147  throw deserialization_exceptiont("defines entry must be sequence");
148 
149  for(const auto &define : to_json_array(defines))
150  {
151  if(!define.is_string())
152  throw deserialization_exceptiont("define must be string");
153 
154  this->defines.push_back(define.value);
155  }
156  }
157 }
158 
160 {
161  auto functions = config["functions"];
162 
163  if(functions.is_null())
164  return;
165 
166  if(!functions.is_array())
167  throw deserialization_exceptiont("functions entry must be sequence");
168 
169  for(const auto &function : to_json_array(functions))
170  {
171  if(!function.is_object())
172  throw deserialization_exceptiont("function entry must be object");
173 
174  for(const auto &function_entry : to_json_object(function))
175  {
176  const auto function_name = function_entry.first;
177  const auto &items = function_entry.second;
178 
179  if(!items.is_array())
180  throw deserialization_exceptiont("function entry must be sequence");
181 
182  this->functions.emplace_back(function_name, functiont{});
183  functiont &function_config = this->functions.back().second;
184 
185  for(const auto &function_item : to_json_array(items))
186  {
187  // These need to start with "ensures", "requires", "assigns",
188  // "invariant", "assert", "stub", "remove"
189  if(!function_item.is_string())
190  throw deserialization_exceptiont("function entry must be string");
191 
192  auto item_string = function_item.value;
193  auto split = split_string(item_string, ' ');
194  if(split.empty())
195  continue;
196 
197  if(
198  split[0] == "ensures" || split[0] == "requires" ||
199  split[0] == "assigns")
200  {
201  std::ostringstream rest;
202  join_strings(rest, split.begin() + 1, split.end(), ' ');
203 
204  function_config.function_contract.emplace_back(split[0], rest.str());
205  }
206  else if(split[0] == "assert" && split.size() >= 3)
207  {
208  std::ostringstream rest;
209  join_strings(rest, split.begin() + 2, split.end(), ' ');
210 
211  function_config.assertions.emplace_back(split[1], rest.str());
212  }
213  else if(
214  (split[0] == "for" || split[0] == "while" || split[0] == "loop") &&
215  split.size() >= 3 &&
216  (split[2] == "invariant" || split[2] == "assigns" ||
217  split[2] == "decreases"))
218  {
219  std::ostringstream rest;
220  join_strings(rest, split.begin() + 3, split.end(), ' ');
221 
222  function_config.loop_contract.emplace_back(
223  split[0], split[1], split[2], rest.str());
224  }
225  else if(split[0] == "stub")
226  {
227  std::ostringstream rest;
228  join_strings(rest, split.begin() + 1, split.end(), ' ');
229 
230  function_config.stub = rest.str();
231  }
232  else if(split[0] == "remove")
233  {
234  if(split.size() == 1)
235  throw deserialization_exceptiont("unexpected remove entry");
236 
237  if(split[1] == "static")
238  function_config.remove_static = true;
239  else
241  "unexpected remove entry " + split[1]);
242  }
243  else
245  "unexpected function entry " + split[0]);
246  }
247  }
248  }
249 }
250 
252 {
253  auto objects = config["objects"];
254 
255  if(objects.is_null())
256  return;
257 
258  if(!objects.is_array())
259  throw deserialization_exceptiont("objects entry must be sequence");
260 
261  for(const auto &object : to_json_array(objects))
262  {
263  if(!object.is_object())
264  throw deserialization_exceptiont("object entry must be object");
265 
266  for(const auto &object_entry : to_json_object(object))
267  {
268  const auto &object_name = object_entry.first;
269  const auto &items = object_entry.second;
270 
271  if(!items.is_array())
272  throw deserialization_exceptiont("object entry must be sequence");
273 
274  this->objects.emplace_back(object_name, objectt{});
275  objectt &object_config = this->objects.back().second;
276 
277  for(const auto &object_item : to_json_array(items))
278  {
279  // Needs to start with "remove"
280  if(!object_item.is_string())
281  throw deserialization_exceptiont("object entry must be string");
282 
283  auto item_string = object_item.value;
284  auto split = split_string(item_string, ' ');
285  if(split.empty())
286  continue;
287 
288  if(split[0] == "remove")
289  {
290  if(split.size() == 1)
291  throw deserialization_exceptiont("unexpected remove entry");
292 
293  if(split[1] == "static")
294  object_config.remove_static = true;
295  else
297  "unexpected remove entry " + split[1]);
298  }
299  else
301  "unexpected object entry " + split[0]);
302  }
303  }
304  }
305 }
306 
308 {
309  auto output = config["output"];
310 
311  if(output.is_null())
312  return;
313 
314  if(!output.is_string())
315  throw deserialization_exceptiont("output entry must be string");
316 
317  this->output = output.value;
318 }
319 
320 static std::string
321 preprocess(const std::string &source_file, const c_wranglert &c_wrangler)
322 {
323  std::vector<std::string> argv = {"cc", "-E", source_file};
324 
325  for(const auto &include : c_wrangler.includes)
326  {
327  argv.push_back("-I");
328  argv.push_back(include);
329  }
330 
331  for(const auto &define : c_wrangler.defines)
332  argv.push_back(std::string("-D") + define);
333 
334  std::ostringstream result;
335 
336  auto run_result = run("cc", argv, "", result, "");
337  if(run_result != 0)
338  throw system_exceptiont("preprocessing " + source_file + " has failed");
339 
340  return result.str();
341 }
342 
343 static c_definest
344 get_defines(const std::string &source_file, const c_wranglert &config)
345 {
346  std::vector<std::string> argv = {"cc", "-E", "-dM", source_file};
347 
348  for(const auto &include : config.includes)
349  {
350  argv.push_back("-I");
351  argv.push_back(include);
352  }
353 
354  std::ostringstream result;
355 
356  auto run_result = run("cc", argv, "", result, "");
357  if(run_result != 0)
358  throw system_exceptiont("preprocessing " + source_file + " has failed");
359 
360  c_definest defines;
361  defines.parse(result.str());
362  return defines;
363 }
364 
365 static void mangle_function(
366  const c_declarationt &declaration,
367  const c_definest &defines,
368  const c_wranglert::functiont &function_config,
369  std::ostream &out)
370 {
371  if(function_config.stub.has_value() && declaration.has_body())
372  {
373  // replace by stub
374  out << function_config.stub.value();
375  }
376  else
377  {
378  if(function_config.remove_static)
379  {
380  for(auto &t : declaration.pre_declarator)
381  {
382  if(t.text == "static")
383  {
384  // we replace by white space
385  out << std::string(6, ' ');
386  }
387  else
388  out << t.text;
389  }
390  }
391  else
392  {
393  for(auto &t : declaration.pre_declarator)
394  out << t.text;
395  }
396 
397  for(auto &t : declaration.declarator)
398  out << t.text;
399  for(auto &t : declaration.post_declarator)
400  out << t.text;
401 
402  if(!declaration.has_body())
403  {
404  for(auto &t : declaration.initializer)
405  out << t.text;
406  return;
407  }
408 
409  for(const auto &entry : function_config.function_contract)
410  out << ' ' << CPROVER_PREFIX << entry.clause << '('
411  << defines(entry.content) << ')';
412 
413  std::map<std::string, std::string> loop_invariants;
414  std::map<std::string, std::string> loop_assigns;
415  std::map<std::string, std::string> loop_decreases;
416 
417  for(const auto &entry : function_config.loop_contract)
418  {
419  if(entry.clause == "invariant")
420  loop_invariants[entry.loop_type + entry.identifier] = entry.content;
421  if(entry.clause == "assigns")
422  loop_assigns[entry.loop_type + entry.identifier] = entry.content;
423  if(entry.clause == "decreases")
424  loop_decreases[entry.loop_type + entry.identifier] = entry.content;
425  }
426 
427  if(
428  loop_invariants.empty() && loop_assigns.empty() && loop_decreases.empty())
429  {
430  for(auto &t : declaration.initializer)
431  out << t.text;
432  }
433  else
434  {
435  std::size_t for_count = 0, while_count = 0;
436  ctokenitt t(declaration.initializer);
437 
438  while(t)
439  {
440  const auto &token = *(t++);
441  out << token.text;
442  std::string invariant;
443  std::string assigns;
444  std::string decreases;
445 
446  if(token == "while")
447  {
448  while_count++;
449  invariant =
450  loop_invariants.count("while" + std::to_string(while_count))
451  ? loop_invariants["while" + std::to_string(while_count)]
452  : loop_invariants
453  ["loop" + std::to_string(while_count + for_count)];
454 
455  assigns =
456  loop_assigns.count("while" + std::to_string(while_count))
457  ? loop_assigns["while" + std::to_string(while_count)]
458  : loop_assigns["loop" + std::to_string(while_count + for_count)];
459 
460  decreases =
461  loop_decreases.count("while" + std::to_string(while_count))
462  ? loop_decreases["while" + std::to_string(while_count)]
463  : loop_decreases
464  ["loop" + std::to_string(while_count + for_count)];
465  }
466  else if(token == "for")
467  {
468  for_count++;
469  invariant = loop_invariants.count("for" + std::to_string(for_count))
470  ? loop_invariants["for" + std::to_string(for_count)]
471  : loop_invariants
472  ["loop" + std::to_string(while_count + for_count)];
473  assigns =
474  loop_assigns.count("for" + std::to_string(for_count))
475  ? loop_assigns["for" + std::to_string(for_count)]
476  : loop_assigns["loop" + std::to_string(while_count + for_count)];
477 
478  decreases = loop_decreases.count("for" + std::to_string(for_count))
479  ? loop_decreases["for" + std::to_string(for_count)]
480  : loop_decreases
481  ["loop" + std::to_string(while_count + for_count)];
482  }
483 
484  auto t_end = match_bracket(t, '(', ')');
485  for(; t != t_end; t++)
486  out << t->text;
487 
488  if(!assigns.empty())
489  {
490  out << ' ' << CPROVER_PREFIX << "assigns(" << defines(assigns) << ')';
491  }
492 
493  if(!invariant.empty())
494  {
495  out << ' ' << CPROVER_PREFIX << "loop_invariant("
496  << defines(invariant) << ')';
497  }
498 
499  if(!decreases.empty())
500  {
501  out << ' ' << CPROVER_PREFIX << "decreases(" << defines(decreases)
502  << ')';
503  }
504  }
505  }
506  }
507 }
508 
509 static void mangle_object(
510  const c_declarationt &declaration,
511  const c_definest &defines,
512  const c_wranglert::objectt &object_config,
513  std::ostream &out)
514 {
515  if(object_config.remove_static)
516  {
517  for(auto &t : declaration.pre_declarator)
518  {
519  if(t.text == "static")
520  {
521  // we replace by white space
522  out << std::string(6, ' ');
523  }
524  else
525  out << t.text;
526  }
527  }
528  else
529  {
530  for(auto &t : declaration.pre_declarator)
531  out << t.text;
532  }
533 
534  for(auto &t : declaration.declarator)
535  out << t.text;
536  for(auto &t : declaration.post_declarator)
537  out << t.text;
538  for(auto &t : declaration.initializer)
539  out << t.text;
540 }
541 
542 static void mangle(
543  const c_declarationt &declaration,
544  const c_definest &defines,
545  const c_wranglert &config,
546  std::ostream &out)
547 {
548  auto name_opt = declaration.declared_identifier();
549  if(declaration.is_function() && name_opt.has_value())
550  {
551  for(const auto &entry : config.functions)
552  {
553  if(std::regex_match(name_opt->text, entry.first))
554  {
555  // we are to modify this function
556  mangle_function(declaration, defines, entry.second, out);
557 
558  return;
559  }
560  }
561  }
562  else if(!declaration.is_function() && name_opt.has_value())
563  {
564  for(const auto &entry : config.objects)
565  {
566  if(std::regex_match(name_opt->text, entry.first))
567  {
568  // we are to modify this function
569  mangle_object(declaration, defines, entry.second, out);
570 
571  return;
572  }
573  }
574  }
575 
576  // output
577  out << declaration;
578 }
579 
580 static std::string mangle(
581  const std::string &in,
582  const c_definest &defines,
583  const c_wranglert &config)
584 {
585  std::ostringstream out;
586  std::istringstream in_str(in);
587 
588  auto parsed = parse_c(in_str);
589 
590  for(const auto &declaration : parsed)
591  mangle(declaration, defines, config, out);
592 
593  return out.str();
594 }
595 
596 void c_wrangler(const jsont &config)
597 {
599 
600  c_wrangler.configure_sources(config);
601  c_wrangler.configure_functions(config);
602  c_wrangler.configure_objects(config);
603  c_wrangler.configure_output(config);
604 
605  for(auto &source_file : c_wrangler.source_files)
606  {
607  // first preprocess
608  auto preprocessed = preprocess(source_file, c_wrangler);
609 
610  // get the defines
611  auto defines = get_defines(source_file, c_wrangler);
612 
613  // now mangle
614  auto mangled = mangle(preprocessed, defines, c_wrangler);
615 
616  // now output
617  if(c_wrangler.output == "stdout" || c_wrangler.output.empty())
618  {
619  std::cout << mangled;
620  }
621  else
622  {
623  std::ofstream out(c_wrangler.output);
624  out << mangled;
625  }
626  }
627 }
configt config
Definition: config.cpp:25
c_defines
static void mangle_function(const c_declarationt &declaration, const c_definest &defines, const c_wranglert::functiont &function_config, std::ostream &out)
Definition: c_wrangler.cpp:365
static c_definest get_defines(const std::string &source_file, const c_wranglert &config)
Definition: c_wrangler.cpp:344
static void mangle_object(const c_declarationt &declaration, const c_definest &defines, const c_wranglert::objectt &object_config, std::ostream &out)
Definition: c_wrangler.cpp:509
void c_wrangler(const jsont &config)
Definition: c_wrangler.cpp:596
static std::string preprocess(const std::string &source_file, const c_wranglert &c_wrangler)
Definition: c_wrangler.cpp:321
static void mangle(const c_declarationt &declaration, const c_definest &defines, const c_wranglert &config, std::ostream &out)
Definition: c_wrangler.cpp:542
C Wrangler.
This class maintains a representation of one assignment to the preprocessor macros in a C program.
Definition: c_defines.h:23
void parse(const std::string &)
Definition: c_defines.cpp:21
std::string text
Definition: ctoken.h:40
Thrown when failing to deserialize a value from some low level format, like JSON or raw bytes.
Definition: json.h:27
Thrown when some external system fails unexpectedly.
#define CPROVER_PREFIX
ctokenitt match_bracket(ctokenitt t, char open, char close)
Definition: ctokenit.cpp:33
ctokenit
json_arrayt & to_json_array(jsont &json)
Definition: json.h:424
json_objectt & to_json_object(jsont &json)
Definition: json.h:442
c_translation_unitt parse_c(std::istream &in)
Mini C Parser.
int run(const std::string &what, const std::vector< std::string > &argv)
Definition: run.cpp:48
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
void split_string(const std::string &s, char delim, std::vector< std::string > &result, bool strip, bool remove_empty)
Stream & join_strings(Stream &&os, const It b, const It e, const Delimiter &delimiter, TransformFunc &&transform_func)
Prints items to an stream, separated by a constant delimiter.
Definition: string_utils.h:61
bool has_body() const
std::optional< ctokent > declared_identifier() const
bool is_function() const
tokenst post_declarator
Definition: mini_c_parser.h:28
tokenst initializer
Definition: mini_c_parser.h:29
tokenst declarator
Definition: mini_c_parser.h:27
tokenst pre_declarator
Definition: mini_c_parser.h:26
assertiont(std::string _identifier, std::string _content)
Definition: c_wrangler.cpp:72
function_contract_clauset(std::string _clause, std::string _content)
Definition: c_wrangler.cpp:43
std::vector< loop_contract_clauset > loop_contract
Definition: c_wrangler.cpp:82
std::vector< function_contract_clauset > function_contract
Definition: c_wrangler.cpp:81
std::vector< assertiont > assertions
Definition: c_wrangler.cpp:83
std::optional< std::string > stub
Definition: c_wrangler.cpp:84
loop_contract_clauset(std::string _loop_type, std::string _identifier, std::string _clause, std::string _content)
Definition: c_wrangler.cpp:55
void configure_output(const jsont &)
Definition: c_wrangler.cpp:307
void configure_objects(const jsont &)
Definition: c_wrangler.cpp:251
objectst objects
Definition: c_wrangler.cpp:97
std::vector< std::string > source_files
Definition: c_wrangler.cpp:34
std::string output
Definition: c_wrangler.cpp:100
std::list< std::pair< std::regex, functiont > > functionst
Definition: c_wrangler.cpp:88
std::list< std::pair< std::regex, objectt > > objectst
Definition: c_wrangler.cpp:96
functionst functions
Definition: c_wrangler.cpp:89
void configure_sources(const jsont &)
Definition: c_wrangler.cpp:108
std::vector< std::string > includes
Definition: c_wrangler.cpp:35
std::vector< std::string > defines
Definition: c_wrangler.cpp:36
void configure_functions(const jsont &)
Definition: c_wrangler.cpp:159