CBMC
format_strings.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Format String Parser
4 
5 Author: CM Wintersteiger
6 
7 \*******************************************************************/
8 
11 
12 #include "format_strings.h"
13 
14 #include <util/c_types.h>
15 #include <util/exception_utils.h>
16 #include <util/invariant.h>
17 #include <util/std_expr.h>
18 
19 #include <cctype>
20 
21 void parse_flags(std::string::const_iterator &it, format_tokent &curtok)
22 {
23  while(*it == '#' || *it == '0' || *it == '-' || *it == ' ' || *it == '+')
24  {
25  switch(*it)
26  {
27  case '#':
29  break;
30  case '0':
32  break;
33  case '-':
35  break;
36  case ' ':
38  break;
39  case '+':
40  curtok.flags.push_back(format_tokent::flag_typet::SIGN);
41  break;
42  default:
44  std::string("unsupported format specifier flag: '") + *it + "'");
45  }
46  it++;
47  }
48 }
49 
50 void parse_field_width(std::string::const_iterator &it, format_tokent &curtok)
51 {
52  if(*it == '*')
53  {
55  it++;
56  }
57 
58  std::string tmp;
59  for(; isdigit(*it); it++)
60  tmp += *it;
61  curtok.field_width = string2integer(tmp);
62 }
63 
64 void parse_precision(std::string::const_iterator &it, format_tokent &curtok)
65 {
66  if(*it == '.')
67  {
68  it++;
69 
70  if(*it == '*')
71  {
73  it++;
74  }
75  else
76  {
77  std::string tmp;
78  for(; isdigit(*it); it++)
79  tmp += *it;
80  curtok.precision = string2integer(tmp);
81  }
82  }
83 }
84 
86  std::string::const_iterator &it,
87  format_tokent &curtok)
88 {
89  if(*it == 'h')
90  {
91  it++;
92  if(*it == 'h')
93  it++;
95  }
96  else if(*it == 'l')
97  {
98  it++;
99  if(*it == 'l')
100  it++;
102  }
103  else if(*it == 'L')
104  {
105  it++;
107  }
108  else if(*it == 'j')
109  {
110  it++;
112  }
113  else if(*it == 't')
114  {
115  it++;
117  }
118 }
119 
121  const std::string &arg_string,
122  std::string::const_iterator &it,
123  format_tokent &curtok)
124 {
125  switch(*it)
126  {
127  case 'd':
128  case 'i':
131  break;
132  case 'o':
135  break;
136  case 'u':
139  break;
140  case 'x':
141  case 'X':
144  break;
145  case 'e':
146  case 'E':
148  break;
149  case 'f':
150  case 'F':
152  break;
153  case 'g':
154  case 'G':
156  break;
157  case 'a':
158  case 'A':
160  break;
161  case 'c':
163  break;
164  case 's':
166  break;
167  case 'p':
169  break;
170  case '%':
172  curtok.value = "%";
173  break;
174  case '[': // pattern matching in, e.g., fscanf.
175  {
176  std::string tmp;
177  it++;
178  if(*it == '^') // if it's there, it must be first
179  {
180  tmp += '^';
181  it++;
182  if(*it == ']') // if it's there, it must be here
183  {
184  tmp += ']';
185  it++;
186  }
187  }
188 
189  for(; it != arg_string.end() && *it != ']'; it++)
190  tmp += *it;
191 
192  break;
193  }
194 
195  default:
197  std::string("unsupported format conversion specifier: '") + *it + "'");
198  }
199  it++;
200 }
201 
202 format_token_listt parse_format_string(const std::string &arg_string)
203 {
204  format_token_listt token_list;
205 
206  std::string::const_iterator it = arg_string.begin();
207 
208  while(it != arg_string.end())
209  {
210  if(*it == '%')
211  {
212  token_list.push_back(format_tokent());
213  format_tokent &curtok = token_list.back();
214  it++;
215 
216  parse_flags(it, curtok);
217  parse_field_width(it, curtok);
218  parse_precision(it, curtok);
219  parse_length_modifier(it, curtok);
220  parse_conversion_specifier(arg_string, it, curtok);
221  }
222  else
223  {
224  if(
225  token_list.empty() ||
226  token_list.back().type != format_tokent::token_typet::TEXT)
227  token_list.push_back(format_tokent(format_tokent::token_typet::TEXT));
228 
229  std::string tmp;
230  for(; it != arg_string.end() && *it != '%'; it++)
231  tmp += *it;
232 
233  INVARIANT(
234  !token_list.empty() &&
235  token_list.back().type == format_tokent::token_typet::TEXT,
236  "must already have a TEXT token at the back of the token list");
237 
238  token_list.back().value = tmp;
239  }
240  }
241 
242  return token_list;
243 }
244 
245 std::optional<typet> get_type(const format_tokent &token)
246 {
247  switch(token.type)
248  {
250  switch(token.length_modifier)
251  {
254  return signed_char_type();
255  else
256  return unsigned_char_type();
257 
260  return signed_short_int_type();
261  else
262  return unsigned_short_int_type();
263 
266  return signed_long_int_type();
267  else
268  return unsigned_long_int_type();
269 
272  return signed_long_long_int_type();
273  else
275 
281  return signed_int_type();
282  else
283  return unsigned_int_type();
284  }
285 
287  switch(token.length_modifier)
288  {
290  return double_type();
292  return long_double_type();
299  return float_type();
300  }
301 
303  switch(token.length_modifier)
304  {
306  return wchar_t_type();
314  return char_type();
315  }
316 
318  return pointer_type(void_type());
319 
321  switch(token.length_modifier)
322  {
324  return array_typet(wchar_t_type(), nil_exprt());
332  return array_typet(char_type(), nil_exprt());
333  }
334 
337  return {};
338  }
339 
340  UNREACHABLE;
341 }
floatbv_typet float_type()
Definition: c_types.cpp:177
signedbv_typet signed_long_int_type()
Definition: c_types.cpp:72
signedbv_typet signed_char_type()
Definition: c_types.cpp:134
unsignedbv_typet unsigned_int_type()
Definition: c_types.cpp:36
unsignedbv_typet unsigned_long_long_int_type()
Definition: c_types.cpp:93
unsignedbv_typet unsigned_long_int_type()
Definition: c_types.cpp:86
empty_typet void_type()
Definition: c_types.cpp:245
signedbv_typet signed_int_type()
Definition: c_types.cpp:22
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:235
unsignedbv_typet unsigned_char_type()
Definition: c_types.cpp:127
bitvector_typet char_type()
Definition: c_types.cpp:106
signedbv_typet signed_long_long_int_type()
Definition: c_types.cpp:79
bitvector_typet wchar_t_type()
Definition: c_types.cpp:141
floatbv_typet long_double_type()
Definition: c_types.cpp:193
floatbv_typet double_type()
Definition: c_types.cpp:185
signedbv_typet signed_short_int_type()
Definition: c_types.cpp:29
unsignedbv_typet unsigned_short_int_type()
Definition: c_types.cpp:43
Arrays with given size.
Definition: std_types.h:807
mp_integer precision
token_typet type
mp_integer field_width
representationt representation
length_modifierst length_modifier
std::list< flag_typet > flags
irep_idt value
The NIL expression.
Definition: std_expr.h:3086
Thrown when we encounter an instruction, parameters to an instruction etc.
int isdigit(int c)
Definition: ctype.c:24
void parse_field_width(std::string::const_iterator &it, format_tokent &curtok)
std::optional< typet > get_type(const format_tokent &token)
void parse_conversion_specifier(const std::string &arg_string, std::string::const_iterator &it, format_tokent &curtok)
void parse_flags(std::string::const_iterator &it, format_tokent &curtok)
void parse_length_modifier(std::string::const_iterator &it, format_tokent &curtok)
format_token_listt parse_format_string(const std::string &arg_string)
void parse_precision(std::string::const_iterator &it, format_tokent &curtok)
Format String Parser.
std::list< format_tokent > format_token_listt
const mp_integer string2integer(const std::string &n, unsigned base)
Definition: mp_arith.cpp:54
#define UNREACHABLE
This should be used to mark dead code.
Definition: invariant.h:525
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition: invariant.h:423
API to expression classes.