CBMC
mini_c_parser.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Mini C Parser
4 
5 Author: Daniel Kroening, dkr@amazon.com
6 
7 \*******************************************************************/
8 
11 
12 #include "mini_c_parser.h"
13 
14 #include <util/exception_utils.h>
15 #include <util/invariant.h>
16 
17 #include "cscanner.h"
18 
20 {
21 public:
23  {
24  }
25 
26  c_translation_unitt parse(std::istream &);
27 
28 protected:
29  std::size_t token_index;
30  using tokenst = std::vector<ctokent>;
32 
33  bool eof() const
34  {
35  return is_eof(peek());
36  }
37 
43 
44  const ctokent &peek() const
45  {
47  return tokens[token_index];
48  }
49 
50  const ctokent &peek(std::size_t how_many) const
51  {
52  PRECONDITION(token_index + how_many < tokens.size());
53  return tokens[token_index + how_many];
54  }
55 
57  {
60  return tokens[token_index++];
61  }
62 
63  static bool is_storage_class(const ctokent &token)
64  {
65  return token == "auto" || token == "extern" || token == "static" ||
66  token == "register" || token == "_Thread_local";
67  }
68 
69  static bool is_type_qualifier(const ctokent &token)
70  {
71  return token == "const" || token == "volatile" || token == "restrict" ||
72  token == "_Atomic";
73  }
74 
75  void skip_ws(tokenst &);
76  void parse_brackets(char open, char close, tokenst &dest);
77 };
78 
79 std::ostream &operator<<(std::ostream &out, const c_declarationt &declaration)
80 {
81  for(const auto &t : declaration.pre_declarator)
82  out << t.text;
83 
84  for(const auto &t : declaration.declarator)
85  out << t.text;
86 
87  for(const auto &t : declaration.post_declarator)
88  out << t.text;
89 
90  for(const auto &t : declaration.initializer)
91  out << t.text;
92 
93  return out;
94 }
95 
96 void c_declarationt::print(std::ostream &out) const
97 {
98  if(!declarator.empty())
99  {
100  out << "DECLARATOR: ";
101  for(const auto &t : declarator)
102  out << t.text;
103  out << '\n';
104  }
105 }
106 
108 {
109  return !post_declarator.empty() && post_declarator.front() == '(';
110 }
111 
113 {
114  return !initializer.empty() && initializer.front() == '{';
115 }
116 
117 std::optional<ctokent> c_declarationt::declared_identifier() const
118 {
119  for(auto &t : declarator)
120  if(is_identifier(t))
121  return t;
122  return {};
123 }
124 
126 {
127  if(eof())
128  return;
129 
130  while(is_ws(peek()) || is_comment(peek()) ||
132  {
133  dest.push_back(consume_token());
134  }
135 }
136 
138 {
139  if(eof() || peek() != open)
140  return;
141 
142  std::size_t bracket_count = 0;
143  while(true)
144  {
145  if(eof())
146  throw invalid_input_exceptiont("expected " + std::string(1, close));
147 
148  auto &token = consume_token();
149  dest.push_back(token);
150  if(token == open)
151  bracket_count++;
152  else if(token == close)
153  {
154  bracket_count--;
155  if(bracket_count == 0)
156  break; // done
157  }
158  }
159 }
160 
162 {
163  // type qualifier
164  // storage class
165  // type
166  // '*'
167  tokenst result;
168 
169  while(true)
170  {
171  skip_ws(result);
172 
173  if(eof())
174  return result;
175 
176  auto &token = peek();
177 
178  if(
179  is_type_qualifier(token) || is_storage_class(token) || token == '*' ||
180  token == "int" || token == "signed" || token.text == "unsigned" ||
181  token == "char" || token == "short" || token == "long" ||
182  token == "float" || token == "double" || token == "inline" ||
183  token == "typedef")
184  {
185  result.push_back(consume_token());
186  }
187  else if(token == "enum" || token == "struct" || token == "union")
188  {
189  result.push_back(consume_token());
190 
191  skip_ws(result);
192 
193  // may be followed by a tag
194  if(!eof() && is_identifier(peek()))
195  result.push_back(consume_token());
196 
197  skip_ws(result);
198 
199  // may be followed by a body {...}
200  parse_brackets('{', '}', result);
201  }
202  else if(token == "__attribute__")
203  {
204  result.push_back(consume_token());
205  skip_ws(result);
206  // followed by (( ... ))
207  parse_brackets('(', ')', result);
208  }
209  else if(is_identifier(token))
210  {
211  // Might be typedef or the declarator.
212  // We look ahead for the next non-WS token to tell the difference.
213  std::size_t index = 1;
214  while(true)
215  {
216  const auto &next_token = peek(index);
217  if(
218  is_ws(next_token) || is_preprocessor_directive(next_token) ||
219  is_comment(next_token))
220  index++;
221  else
222  break;
223  }
224 
225  auto &next_token = peek(index);
226  if(!is_identifier(next_token) && next_token != '*')
227  {
228  // 'token' is the declarator
229  return result;
230  }
231  else
232  result.push_back(consume_token()); // it's a type
233  }
234  else if(token == ';')
235  return result;
236  else if(token == '(') // function type, part of declarator
237  return result;
238  else
239  {
240  source_locationt loc;
241  loc.set_line(token.line_number);
243  "expected a declaration but got '" + token.text + "'", loc);
244  }
245  }
246 }
247 
249 {
250  // symbol
251  // ((...* symbol ...))
252 
253  if(eof())
254  return {};
255 
256  if(peek() == ';')
257  return {};
258 
259  if(peek() == '(')
260  {
261  tokenst result;
262  parse_brackets('(', ')', result);
263  return result;
264  }
265  else if(is_identifier(peek()))
266  {
267  return {consume_token()};
268  }
269  else
270  {
271  source_locationt loc;
272  loc.set_line(peek().line_number);
273  throw invalid_source_file_exceptiont("expected an identifier", loc);
274  }
275 }
276 
278 {
279  // consume everything until we see one of the following:
280  // 1) ';' (end of declaration)
281  // 2) '{' (function body)
282  // 3) '=' (initializer)
283 
284  tokenst result;
285  std::size_t open_parentheses = 0;
286 
287  while(true)
288  {
289  if(eof())
290  return result;
291 
292  if(peek() == '(')
293  {
294  ++open_parentheses;
295  result.push_back(consume_token());
296  continue;
297  }
298  else if(open_parentheses > 0)
299  {
300  if(peek() == ')')
301  --open_parentheses;
302  result.push_back(consume_token());
303  continue;
304  }
305 
306  if(peek() == ';' || peek() == '{' || peek() == '=')
307  return result;
308 
309  result.push_back(consume_token());
310  }
311 }
312 
314 {
315  if(eof())
316  return {};
317  else if(peek() == '=')
318  {
319  tokenst result;
320  while(true)
321  {
322  if(eof())
323  throw invalid_input_exceptiont("expected an initializer");
324  auto &token = consume_token();
325  result.push_back(token);
326  if(token == ';')
327  return result;
328  }
329  }
330  else if(peek() == ';')
331  {
332  // done
333  return {consume_token()};
334  }
335  else if(peek() == '{')
336  {
337  // function body
338  tokenst result;
339  std::size_t bracket_count = 0;
340  while(true)
341  {
342  if(eof())
343  throw invalid_input_exceptiont("eof in function body");
344  auto &token = consume_token();
345  result.push_back(token);
346  if(token == '{')
347  bracket_count++;
348  else if(token == '}')
349  {
350  bracket_count--;
351  if(bracket_count == 0)
352  return result;
353  }
354  }
355  }
356  else
357  PRECONDITION(false);
358 }
359 
361 {
362  c_declarationt result;
363 
365  result.declarator = parse_declarator();
367  result.initializer = parse_initializer();
368 
369  return result;
370 }
371 
373 {
374  cscannert cscanner(in);
375  cscanner.return_WS_and_comments = true;
376  tokens = cscanner.get_tokens();
377  token_index = 0;
378 
379  if(tokens.empty())
380  return {};
381 
382  DATA_INVARIANT(is_eof(tokens.back()), "token stream must end on eof");
383 
384  c_translation_unitt result;
385 
386  while(!eof())
387  result.push_back(parse_declaration());
388 
389  return result;
390 }
391 
392 c_translation_unitt parse_c(std::istream &in)
393 {
394  return mini_c_parsert().parse(in);
395 }
bool return_WS_and_comments
Definition: cscanner.h:31
std::vector< ctokent > get_tokens()
Definition: cscanner.cpp:41
Definition: ctoken.h:19
Thrown when user-provided input cannot be processed.
Thrown when we can't handle something in an input source file.
tokenst parse_declarator()
void parse_brackets(char open, char close, tokenst &dest)
c_translation_unitt parse(std::istream &)
void skip_ws(tokenst &)
std::size_t token_index
static bool is_storage_class(const ctokent &token)
tokenst parse_post_declarator()
tokenst parse_pre_declarator()
const ctokent & peek(std::size_t how_many) const
std::vector< ctokent > tokenst
bool eof() const
const ctokent & peek() const
c_declarationt parse_declaration()
tokenst parse_initializer()
const ctokent & consume_token()
static bool is_type_qualifier(const ctokent &token)
void set_line(const irep_idt &line)
cscanner
static bool is_comment(const ctokent &t)
Definition: ctoken.h:93
static bool is_preprocessor_directive(const ctokent &t)
Definition: ctoken.h:98
static bool is_ws(const ctokent &t)
Definition: ctoken.h:83
static bool is_eof(const ctokent &t)
Definition: ctoken.h:88
int open(const char *pathname, int flags,...)
Definition: fcntl.c:89
std::ostream & operator<<(std::ostream &out, const c_declarationt &declaration)
c_translation_unitt parse_c(std::istream &in)
Mini C Parser.
std::vector< c_declarationt > c_translation_unitt
Definition: mini_c_parser.h:37
static bool is_identifier(int token)
Definition: parse.cpp:421
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:534
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
bool has_body() const
std::optional< ctokent > declared_identifier() const
bool is_function() const
void print(std::ostream &) const
tokenst post_declarator
Definition: mini_c_parser.h:28
tokenst initializer
Definition: mini_c_parser.h:29
tokenst declarator
Definition: mini_c_parser.h:27
tokenst pre_declarator
Definition: mini_c_parser.h:26
int close(int fildes)
Definition: unistd.c:139