CBMC
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
mini_c_parser.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: Mini C Parser
4
5Author: Daniel Kroening, dkr@amazon.com
6
7\*******************************************************************/
8
11
12#include "mini_c_parser.h"
13
15#include <util/invariant.h>
16
17#include "cscanner.h"
18
20{
21public:
23 {
24 }
25
26 c_translation_unitt parse(std::istream &);
27
28protected:
29 std::size_t token_index;
30 using tokenst = std::vector<ctokent>;
32
33 bool eof() const
34 {
35 return is_eof(peek());
36 }
37
43
44 const ctokent &peek() const
45 {
47 return tokens[token_index];
48 }
49
50 const ctokent &peek(std::size_t how_many) const
51 {
53 return tokens[token_index + how_many];
54 }
55
62
63 static bool is_storage_class(const ctokent &token)
64 {
65 return token == "auto" || token == "extern" || token == "static" ||
66 token == "register" || token == "_Thread_local";
67 }
68
69 static bool is_type_qualifier(const ctokent &token)
70 {
71 return token == "const" || token == "volatile" || token == "restrict" ||
72 token == "_Atomic";
73 }
74
75 void skip_ws(tokenst &);
76 void parse_brackets(char open, char close, tokenst &dest);
77};
78
79std::ostream &operator<<(std::ostream &out, const c_declarationt &declaration)
80{
81 for(const auto &t : declaration.pre_declarator)
82 out << t.text;
83
84 for(const auto &t : declaration.declarator)
85 out << t.text;
86
87 for(const auto &t : declaration.post_declarator)
88 out << t.text;
89
90 for(const auto &t : declaration.initializer)
91 out << t.text;
92
93 return out;
94}
95
96void c_declarationt::print(std::ostream &out) const
97{
98 if(!declarator.empty())
99 {
100 out << "DECLARATOR: ";
101 for(const auto &t : declarator)
102 out << t.text;
103 out << '\n';
104 }
105}
106
108{
109 return !post_declarator.empty() && post_declarator.front() == '(';
110}
111
113{
114 return !initializer.empty() && initializer.front() == '{';
115}
116
117std::optional<ctokent> c_declarationt::declared_identifier() const
118{
119 for(auto &t : declarator)
120 if(is_identifier(t))
121 return t;
122 return {};
123}
124
126{
127 if(eof())
128 return;
129
130 while(is_ws(peek()) || is_comment(peek()) ||
132 {
133 dest.push_back(consume_token());
134 }
135}
136
138{
139 if(eof() || peek() != open)
140 return;
141
142 std::size_t bracket_count = 0;
143 while(true)
144 {
145 if(eof())
146 throw invalid_input_exceptiont("expected " + std::string(1, close));
147
148 auto &token = consume_token();
149 dest.push_back(token);
150 if(token == open)
152 else if(token == close)
153 {
155 if(bracket_count == 0)
156 break; // done
157 }
158 }
159}
160
162{
163 // type qualifier
164 // storage class
165 // type
166 // '*'
167 tokenst result;
168
169 while(true)
170 {
171 skip_ws(result);
172
173 if(eof())
174 return result;
175
176 auto &token = peek();
177
178 if(
179 is_type_qualifier(token) || is_storage_class(token) || token == '*' ||
180 token == "int" || token == "signed" || token.text == "unsigned" ||
181 token == "char" || token == "short" || token == "long" ||
182 token == "float" || token == "double" || token == "inline" ||
183 token == "typedef")
184 {
185 result.push_back(consume_token());
186 }
187 else if(token == "enum" || token == "struct" || token == "union")
188 {
189 result.push_back(consume_token());
190
191 skip_ws(result);
192
193 // may be followed by a tag
194 if(!eof() && is_identifier(peek()))
195 result.push_back(consume_token());
196
197 skip_ws(result);
198
199 // may be followed by a body {...}
200 parse_brackets('{', '}', result);
201 }
202 else if(token == "__attribute__")
203 {
204 result.push_back(consume_token());
205 skip_ws(result);
206 // followed by (( ... ))
207 parse_brackets('(', ')', result);
208 }
209 else if(is_identifier(token))
210 {
211 // Might be typedef or the declarator.
212 // We look ahead for the next non-WS token to tell the difference.
213 std::size_t index = 1;
214 while(true)
215 {
216 const auto &next_token = peek(index);
217 if(
218 is_ws(next_token) || is_preprocessor_directive(next_token) ||
219 is_comment(next_token))
220 index++;
221 else
222 break;
223 }
224
225 auto &next_token = peek(index);
226 if(!is_identifier(next_token) && next_token != '*')
227 {
228 // 'token' is the declarator
229 return result;
230 }
231 else
232 result.push_back(consume_token()); // it's a type
233 }
234 else if(token == ';')
235 return result;
236 else if(token == '(') // function type, part of declarator
237 return result;
238 else
239 {
241 loc.set_line(token.line_number);
243 "expected a declaration but got '" + token.text + "'", loc);
244 }
245 }
246}
247
249{
250 // symbol
251 // ((...* symbol ...))
252
253 if(eof())
254 return {};
255
256 if(peek() == ';')
257 return {};
258
259 if(peek() == '(')
260 {
261 tokenst result;
262 parse_brackets('(', ')', result);
263 return result;
264 }
265 else if(is_identifier(peek()))
266 {
267 return {consume_token()};
268 }
269 else
270 {
272 loc.set_line(peek().line_number);
273 throw invalid_source_file_exceptiont("expected an identifier", loc);
274 }
275}
276
278{
279 // consume everything until we see one of the following:
280 // 1) ';' (end of declaration)
281 // 2) '{' (function body)
282 // 3) '=' (initializer)
283
284 tokenst result;
285 std::size_t open_parentheses = 0;
286
287 while(true)
288 {
289 if(eof())
290 return result;
291
292 if(peek() == '(')
293 {
295 result.push_back(consume_token());
296 continue;
297 }
298 else if(open_parentheses > 0)
299 {
300 if(peek() == ')')
302 result.push_back(consume_token());
303 continue;
304 }
305
306 if(peek() == ';' || peek() == '{' || peek() == '=')
307 return result;
308
309 result.push_back(consume_token());
310 }
311}
312
314{
315 if(eof())
316 return {};
317 else if(peek() == '=')
318 {
319 tokenst result;
320 while(true)
321 {
322 if(eof())
323 throw invalid_input_exceptiont("expected an initializer");
324 auto &token = consume_token();
325 result.push_back(token);
326 if(token == ';')
327 return result;
328 }
329 }
330 else if(peek() == ';')
331 {
332 // done
333 return {consume_token()};
334 }
335 else if(peek() == '{')
336 {
337 // function body
338 tokenst result;
339 std::size_t bracket_count = 0;
340 while(true)
341 {
342 if(eof())
343 throw invalid_input_exceptiont("eof in function body");
344 auto &token = consume_token();
345 result.push_back(token);
346 if(token == '{')
348 else if(token == '}')
349 {
351 if(bracket_count == 0)
352 return result;
353 }
354 }
355 }
356 else
357 PRECONDITION(false);
358}
359
361{
362 c_declarationt result;
363
365 result.declarator = parse_declarator();
368
369 return result;
370}
371
373{
375 cscanner.return_WS_and_comments = true;
376 tokens = cscanner.get_tokens();
377 token_index = 0;
378
379 if(tokens.empty())
380 return {};
381
382 DATA_INVARIANT(is_eof(tokens.back()), "token stream must end on eof");
383
384 c_translation_unitt result;
385
386 while(!eof())
387 result.push_back(parse_declaration());
388
389 return result;
390}
391
393{
394 return mini_c_parsert().parse(in);
395}
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:562
Thrown when user-provided input cannot be processed.
Thrown when we can't handle something in an input source file.
tokenst parse_declarator()
void parse_brackets(char open, char close, tokenst &dest)
const ctokent & peek(std::size_t how_many) const
c_translation_unitt parse(std::istream &)
void skip_ws(tokenst &)
std::size_t token_index
const ctokent & peek() const
static bool is_storage_class(const ctokent &token)
tokenst parse_post_declarator()
tokenst parse_pre_declarator()
std::vector< ctokent > tokenst
bool eof() const
c_declarationt parse_declaration()
tokenst parse_initializer()
const ctokent & consume_token()
static bool is_type_qualifier(const ctokent &token)
void set_line(const irep_idt &line)
cscanner
static bool is_comment(const ctokent &t)
Definition ctoken.h:93
static bool is_preprocessor_directive(const ctokent &t)
Definition ctoken.h:98
static bool is_ws(const ctokent &t)
Definition ctoken.h:83
static bool is_eof(const ctokent &t)
Definition ctoken.h:88
int open(const char *pathname, int flags,...)
Definition fcntl.c:89
std::ostream & operator<<(std::ostream &out, const c_declarationt &declaration)
c_translation_unitt parse_c(std::istream &in)
Mini C Parser.
std::vector< c_declarationt > c_translation_unitt
static bool is_identifier(int token)
Definition parse.cpp:421
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition invariant.h:534
#define PRECONDITION(CONDITION)
Definition invariant.h:463
bool has_body() const
std::optional< ctokent > declared_identifier() const
bool is_function() const
void print(std::ostream &) const
tokenst post_declarator
tokenst initializer
tokenst pre_declarator
int close(int fildes)
Definition unistd.c:139