CBMC
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
smt2_tokenizer.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module:
4
5Author: Daniel Kroening, kroening@kroening.com
6
7\*******************************************************************/
8
9#include "smt2_tokenizer.h"
10
12{
13 // any non-empty sequence of letters, digits and the characters
14 // ~ ! @ $ % ^ & * _ - + = < > . ? /
15 // that does not start with a digit and is not a reserved word.
16
17 return isalnum(ch) ||
18 ch=='~' || ch=='!' || ch=='@' || ch=='$' || ch=='%' ||
19 ch=='^' || ch=='&' || ch=='*' || ch=='_' || ch=='-' ||
20 ch=='+' || ch=='=' || ch=='<' || ch=='>' || ch=='.' ||
21 ch=='?' || ch=='/';
22}
23
25{
26 // any non-empty sequence of letters, digits and the characters
27 // ~ ! @ $ % ^ & * _ - + = < > . ? /
28 // that does not start with a digit and is not a reserved word.
29
30 buffer.clear();
31
32 char ch;
33 while(in->get(ch))
34 {
36 {
37 buffer+=ch;
38 }
39 else
40 {
41 in->unget(); // put back
42 quoted_symbol = false;
43 return SYMBOL;
44 }
45 }
46
47 // eof -- this is ok here
48 if(buffer.empty())
49 return END_OF_FILE;
50 else
51 {
52 quoted_symbol = false;
53 return SYMBOL;
54 }
55}
56
58{
59 // we accept any sequence of digits and dots
60
61 buffer.clear();
62
63 char ch;
64 while(in->get(ch))
65 {
66 if(isdigit(ch) || ch=='.')
67 {
68 buffer+=ch;
69 }
70 else
71 {
72 in->unget(); // put back
73 return NUMERAL;
74 }
75 }
76
77 // eof -- this is ok here
78 if(buffer.empty())
79 return END_OF_FILE;
80 else
81 return NUMERAL;
82}
83
85{
86 // we accept any sequence of '0' or '1'
87
88 buffer.clear();
89 buffer+='#';
90 buffer+='b';
91
92 char ch;
93 while(in->get(ch))
94 {
95 if(ch=='0' || ch=='1')
96 {
97 buffer+=ch;
98 }
99 else
100 {
101 in->unget(); // put back
102 return NUMERAL;
103 }
104 }
105
106 // eof -- this is ok here
107 if(buffer.empty())
108 return END_OF_FILE;
109 else
110 return NUMERAL;
111}
112
114{
115 // we accept any sequence of '0'-'9', 'a'-'f', 'A'-'F'
116
117 buffer.clear();
118 buffer+='#';
119 buffer+='x';
120
121 char ch;
122 while(in->get(ch))
123 {
124 if(isxdigit(ch))
125 {
126 buffer+=ch;
127 }
128 else
129 {
130 in->unget(); // put back
131 return NUMERAL;
132 }
133 }
134
135 // eof -- this is ok here
136 if(buffer.empty())
137 return END_OF_FILE;
138 else
139 return NUMERAL;
140}
141
143{
144 // any sequence of printable ASCII characters (including space,
145 // tab, and line-breaking characters) except for the backslash
146 // character \, that starts and ends with | and does not otherwise
147 // contain |
148
149 buffer.clear();
150
151 char ch;
152 while(in->get(ch))
153 {
154 if(ch=='|')
155 {
156 quoted_symbol = true;
157 return SYMBOL; // done
158 }
159
160 buffer+=ch;
161
162 if(ch=='\n')
163 line_no++;
164 }
165
166 // Hmpf. Eof before end of quoted symbol. This is an error.
167 throw error("EOF within quoted symbol");
168}
169
171{
172 buffer.clear();
173
174 char ch;
175 while(in->get(ch))
176 {
177 if(ch=='"')
178 {
179 // quotes may be escaped by repeating
180 if(in->get(ch))
181 {
182 if(ch=='"')
183 {
184 }
185 else
186 {
187 in->unget();
188 return STRING_LITERAL; // done
189 }
190 }
191 else
192 return STRING_LITERAL; // done
193 }
194 buffer+=ch;
195 }
196
197 // Hmpf. Eof before end of string literal. This is an error.
198 throw error("EOF within string literal");
199}
200
202{
203 if(peeked)
204 peeked = false;
205 else
207
208 return token;
209}
210
212{
213 char ch;
214
215 while(in->get(ch))
216 {
217 switch(ch)
218 {
219 case '\n':
220 line_no++;
221 break;
222
223 case ' ':
224 case '\r':
225 case '\t':
226 case static_cast<char>(160): // non-breaking space
227 // skip any whitespace
228 break;
229
230 case ';': // comment
231 // skip until newline
232 while(in->get(ch))
233 {
234 if(ch=='\n')
235 {
236 line_no++;
237 break;
238 }
239 }
240 break;
241
242 case '(':
243 // produce sub-expression
244 token = OPEN;
245 return;
246
247 case ')':
248 // done with sub-expression
249 token = CLOSE;
250 return;
251
252 case '|': // quoted symbol
254 return;
255
256 case '"': // string literal
258 return;
259
260 case ':': // keyword
262 if(token == SYMBOL)
263 {
264 token = KEYWORD;
265 return;
266 }
267 else
268 throw error("expecting symbol after colon");
269
270 case '#':
271 if(in->get(ch))
272 {
273 if(ch=='b')
274 {
276 return;
277 }
278 else if(ch=='x')
279 {
281 return;
282 }
283 else
284 throw error("unknown numeral token");
285 }
286 else
287 throw error("unexpected EOF in numeral token");
288 break;
289
290 default: // likely a simple symbol or a numeral
291 if(isdigit(ch))
292 {
293 in->unget();
295 return;
296 }
298 {
299 in->unget();
301 return;
302 }
303 else
304 {
305 // illegal character, error
306 throw error() << "unexpected character '" << ch << '\'';
307 }
308 }
309 }
310
312}
virtual void clear()
Reset the abstract state.
Definition ai.h:265
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:562
void get_token_from_stream()
read a token from the input stream and store it in 'token'
tokent get_string_literal()
tokent get_decimal_numeral()
std::istream * in
smt2_errort error() const
generate an error exception
tokent get_simple_symbol()
std::string buffer
int isdigit(int c)
Definition ctype.c:24
int isxdigit(int c)
Definition ctype.c:95
int isalnum(int c)
Definition ctype.c:4
bool is_smt2_simple_symbol_character(char ch)
bool is_smt2_simple_symbol_character(char)