CBMC
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
convert_string_literal.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: C/C++ Language Conversion
4
5Author: Daniel Kroening, kroening@kroening.com
6
7\*******************************************************************/
8
11
13
14#include <util/arith_tools.h>
15#include <util/c_types.h>
16#include <util/unicode.h>
18
19#include "unescape_string.h"
20
21std::basic_string<char32_t> convert_one_string_literal(const std::string &src)
22{
23 PRECONDITION(src.size() >= 2);
24
25 if(src[0]=='u' && src[1]=='8')
26 {
27 PRECONDITION(src[src.size() - 1] == '"');
28 PRECONDITION(src[2] == '"');
29
30 std::basic_string<char32_t> value =
31 unescape_wide_string(std::string(src, 3, src.size() - 4));
32
33 // turn into utf-8
34 const std::string utf8_value = utf32_native_endian_to_utf8(value);
35
36 // pad into wide string
37 value.resize(utf8_value.size());
38 for(std::size_t i=0; i<utf8_value.size(); i++)
39 value[i]=utf8_value[i];
40
41 return value;
42 }
43 else if(src[0]=='L' || src[0]=='u' || src[0]=='U')
44 {
45 PRECONDITION(src[src.size() - 1] == '"');
46 PRECONDITION(src[1] == '"');
47
48 return unescape_wide_string(std::string(src, 2, src.size()-3));
49 }
50 else
51 {
52 PRECONDITION(src[0] == '"');
53 PRECONDITION(src[src.size() - 1] == '"');
54
55 std::string char_value=
56 unescape_string(std::string(src, 1, src.size()-2));
57
58 // pad into wide string
59 std::basic_string<char32_t> value;
60 value.resize(char_value.size());
61 for(std::size_t i=0; i<char_value.size(); i++)
62 value[i]=char_value[i];
63
64 return value;
65 }
66}
67
68exprt convert_string_literal(const std::string &src)
69{
70 // note that 'src' could be a concatenation of string literals,
71 // e.g., something like "asd" "xyz".
72 // GCC allows "asd" L"xyz"!
73
74 std::basic_string<char32_t> value;
75
76 char wide=0;
77
78 for(std::size_t i=0; i<src.size(); i++)
79 {
80 char ch=src[i];
81
82 // skip whitespace/newline
83 if(ch!='L' && ch!='u' && ch!='U' && ch!='"')
84 continue;
85
86 if(ch=='L')
87 wide=ch;
88 if((ch=='u' || ch=='U') && i+1<src.size() && src[i+1]=='"')
89 wide=ch;
90
91 // find start of sequence
92 std::size_t j=src.find('"', i);
93 CHECK_RETURN(j != std::string::npos);
94
95 // find end of sequence, considering escaping
96 for(++j; j<src.size() && src[j]!='"'; ++j)
97 if(src[j]=='\\') // skip next character
98 ++j;
99
100 INVARIANT(j < src.size(), "non-terminated string constant '" + src + "'");
101
102 std::string tmp_src=std::string(src, i, j-i+1);
103 std::basic_string<char32_t> tmp_value = convert_one_string_literal(tmp_src);
104 value.append(tmp_value);
105 i=j;
106 }
107
108 if(wide!=0)
109 {
110 // add implicit trailing zero
111 value.push_back(0);
112
113 // L is wchar_t, u is char16_t, U is char32_t.
114 typet subtype;
115
116 switch(wide)
117 {
118 case 'L': subtype=wchar_t_type(); break;
119 case 'u': subtype=char16_t_type(); break;
120 case 'U': subtype=char32_t_type(); break;
121 default:
123 }
124
125 exprt result=exprt(ID_array);
126 result.set(ID_C_string_constant, true);
127 result.type() =
128 array_typet(subtype, from_integer(value.size(), c_index_type()));
129
130 result.operands().resize(value.size());
131 for(std::size_t i=0; i<value.size(); i++)
132 result.operands()[i]=from_integer(value[i], subtype);
133
134 return result;
135 }
136 else
137 {
138 std::string char_value;
139
140 char_value.resize(value.size());
141
142 for(std::size_t i=0; i<value.size(); i++)
143 {
144 // Loss of data here if value[i]>255.
145 // gcc issues a warning in this case.
146 char_value[i]=value[i];
147 }
148
150 }
151}
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
unsignedbv_typet char32_t_type()
Definition c_types.cpp:167
bitvector_typet wchar_t_type()
Definition c_types.cpp:141
bitvector_typet c_index_type()
Definition c_types.cpp:16
unsignedbv_typet char16_t_type()
Definition c_types.cpp:157
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:562
Arrays with given size.
Definition std_types.h:807
Base class for all expressions.
Definition expr.h:56
typet & type()
Return the type of the expression.
Definition expr.h:84
operandst & operands()
Definition expr.h:94
void set(const irep_idt &name, const irep_idt &value)
Definition irep.h:412
The type of an expression, extends irept.
Definition type.h:29
std::basic_string< char32_t > convert_one_string_literal(const std::string &src)
C/C++ Language Conversion.
exprt convert_string_literal(const std::string &src)
#define CHECK_RETURN(CONDITION)
Definition invariant.h:495
#define UNREACHABLE
This should be used to mark dead code.
Definition invariant.h:525
#define PRECONDITION(CONDITION)
Definition invariant.h:463
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition invariant.h:423
std::basic_string< char32_t > unescape_wide_string(const std::string &src)
std::string unescape_string(const std::string &src)
ANSI-C Language Conversion.
std::string utf32_native_endian_to_utf8(const std::basic_string< char32_t > &s)
Definition unicode.cpp:136