CBMC
Loading...
Searching...
No Matches
smt2_tokenizer.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module:
4
5Author: Daniel Kroening, kroening@kroening.com
6
7\*******************************************************************/
8
12
13#include "smt2_tokenizer.h"
14
16{
17 // any non-empty sequence of letters, digits and the characters
18 // ~ ! @ $ % ^ & * _ - + = < > . ? /
19 // that does not start with a digit and is not a reserved word.
20
21 return isalnum(ch) ||
22 ch=='~' || ch=='!' || ch=='@' || ch=='$' || ch=='%' ||
23 ch=='^' || ch=='&' || ch=='*' || ch=='_' || ch=='-' ||
24 ch=='+' || ch=='=' || ch=='<' || ch=='>' || ch=='.' ||
25 ch=='?' || ch=='/';
26}
27
29{
30 // any non-empty sequence of letters, digits and the characters
31 // ~ ! @ $ % ^ & * _ - + = < > . ? /
32 // that does not start with a digit and is not a reserved word.
33
34 tokent t{SYMBOL};
35
36 char ch;
37 while(in->get(ch))
38 {
40 {
41 t.text += ch;
42 }
43 else
44 {
45 in->unget(); // put back
46 t.line_no = line_no;
47 return t;
48 }
49 }
50
51 // eof -- this is ok here
52 t.line_no = line_no;
53 if(t.text.empty())
54 t.kind = END_OF_FILE;
55 return t;
56}
57
59{
60 // we accept any sequence of digits and dots
61
62 tokent t{NUMERAL};
63
64 char ch;
65 while(in->get(ch))
66 {
67 if(isdigit(ch) || ch=='.')
68 {
69 t.text += ch;
70 }
71 else
72 {
73 in->unget(); // put back
74 t.line_no = line_no;
75 return t;
76 }
77 }
78
79 // eof -- this is ok here
80 t.line_no = line_no;
81 if(t.text.empty())
82 t.kind = END_OF_FILE;
83 return t;
84}
85
87{
88 // we accept any sequence of '0' or '1'
89
90 tokent t{NUMERAL};
91 t.text = "#b";
92
93 char ch;
94 while(in->get(ch))
95 {
96 if(ch=='0' || ch=='1')
97 {
98 t.text += ch;
99 }
100 else
101 {
102 in->unget(); // put back
103 t.line_no = line_no;
104 return t;
105 }
106 }
107
108 // eof -- this is ok here
109 t.line_no = line_no;
110 return t;
111}
112
114{
115 // we accept any sequence of '0'-'9', 'a'-'f', 'A'-'F'
116
117 tokent t{NUMERAL};
118 t.text = "#x";
119
120 char ch;
121 while(in->get(ch))
122 {
123 if(isxdigit(ch))
124 {
125 t.text += ch;
126 }
127 else
128 {
129 in->unget(); // put back
130 t.line_no = line_no;
131 return t;
132 }
133 }
134
135 // eof -- this is ok here
136 t.line_no = line_no;
137 return t;
138}
139
141{
142 // any sequence of printable ASCII characters (including space,
143 // tab, and line-breaking characters) except for the backslash
144 // character \, that starts and ends with | and does not otherwise
145 // contain |
146
147 tokent t{SYMBOL};
148 t.quoted_symbol = true;
149
150 char ch;
151 while(in->get(ch))
152 {
153 if(ch=='|')
154 {
155 t.line_no = line_no;
156 return t;
157 }
158
159 t.text += ch;
160
161 if(ch=='\n')
162 line_no++;
163 }
164
165 // Hmpf. Eof before end of quoted symbol. This is an error.
166 throw error("EOF within quoted symbol");
167}
168
170{
172
173 char ch;
174 while(in->get(ch))
175 {
176 if(ch=='"')
177 {
178 // quotes may be escaped by repeating
179 if(in->get(ch))
180 {
181 if(ch=='"')
182 {
183 }
184 else
185 {
186 in->unget();
187 t.line_no = line_no;
188 return t; // done
189 }
190 }
191 else
192 {
193 t.line_no = line_no;
194 return t; // done
195 }
196 }
197 t.text += ch;
198 }
199
200 // Hmpf. Eof before end of string literal. This is an error.
201 throw error("EOF within string literal");
202}
203
205{
206 if(peeked.has_value())
207 {
208 tokent result = std::move(*peeked);
209 peeked.reset();
210 return result;
211 }
212 return read_token();
213}
214
216{
217 char ch;
218
219 while(in->get(ch))
220 {
221 switch(ch)
222 {
223 case '\n':
224 line_no++;
225 break;
226
227 case ' ':
228 case '\r':
229 case '\t':
230 case static_cast<char>(160): // non-breaking space
231 // skip any whitespace
232 break;
233
234 case ';': // comment
235 // skip until newline
236 while(in->get(ch))
237 {
238 if(ch=='\n')
239 {
240 line_no++;
241 break;
242 }
243 }
244 break;
245
246 case '(':
247 {
248 tokent t{OPEN};
249 t.line_no = line_no;
250 return t;
251 }
252
253 case ')':
254 {
255 tokent t{CLOSE};
256 t.line_no = line_no;
257 return t;
258 }
259
260 case '|': // quoted symbol
261 return get_quoted_symbol();
262
263 case '"': // string literal
264 return get_string_literal();
265
266 case ':': // keyword
267 {
269 if(t.kind != SYMBOL)
270 throw error("expecting symbol after colon");
271 t.kind = KEYWORD;
272 return t;
273 }
274
275 case '#':
276 if(in->get(ch))
277 {
278 if(ch=='b')
279 return get_bin_numeral();
280 else if(ch=='x')
281 return get_hex_numeral();
282 else
283 throw error("unknown numeral token");
284 }
285 else
286 throw error("unexpected EOF in numeral token");
287 break;
288
289 default: // likely a simple symbol or a numeral
290 if(isdigit(ch))
291 {
292 in->unget();
293 return get_decimal_numeral();
294 }
296 {
297 in->unget();
298 return get_simple_symbol();
299 }
300 else
301 {
302 // illegal character, error
303 throw error() << "unexpected character '" << ch << '\'';
304 }
305 }
306 }
307
309 t.line_no = line_no;
310 return t;
311}
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:566
One SMT-LIB v2.6 token.
token_kindt kind
The kind of token; see token_kindt.
bool quoted_symbol
True iff kind == SYMBOL and the symbol was given in |...| quoted form; always false for other token k...
tokent get_string_literal()
Read a STRING_LITERAL of the form "..." from the stream, applying SMT-LIB v2.6 quote-doubling: a "" i...
std::optional< tokent > peeked
Token that has been peeked but not yet consumed.
tokent get_decimal_numeral()
Read a NUMERAL of the form "[0-9.]+" from the stream.
tokent next_token()
Consume and return the next token.
std::istream * in
smt2_errort error() const
generate an error exception
tokent get_bin_numeral()
Read a NUMERAL of the form "#b[01]+" from the stream.
tokent get_simple_symbol()
Read a SYMBOL of the form "[A-Za-z~!@$%^&*_\-+=<>.?/][...]*" from the stream.
tokent get_quoted_symbol()
Read a quoted SYMBOL of the form |...| from the stream.
unsigned line_no
Number of the source line currently being read.
tokent read_token()
Read a single token directly from the input stream.
tokent get_hex_numeral()
Read a NUMERAL of the form "#x[0-9a-fA-F]+" from the stream.
int isdigit(int c)
Definition ctype.c:24
int isxdigit(int c)
Definition ctype.c:95
int isalnum(int c)
Definition ctype.c:4
bool is_smt2_simple_symbol_character(char ch)
Tokenizer for the SMT-LIB v2.6 syntax.
bool is_smt2_simple_symbol_character(char)