CBMC
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
unescape_string.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: ANSI-C Language Conversion
4
5Author: Daniel Kroening, kroening@kroening.com
6
7\*******************************************************************/
8
11
12#include "unescape_string.h"
13
14#include <cctype>
15
16#include <util/invariant.h>
17#include <util/unicode.h>
18
20 unsigned int value,
21 std::string &dest)
22{
23 std::basic_string<char32_t> value_str(1, value);
24
25 // turn into utf-8
27
28 dest.append(utf8_value);
29}
30
31static void
32append_universal_char(unsigned int value, std::basic_string<char32_t> &dest)
33{
34 dest.push_back(value);
35}
36
37template<typename T>
38std::basic_string<T> unescape_string_templ(const std::string &src)
39{
40 std::basic_string<T> dest;
41
42 dest.reserve(src.size()); // about that long, but may be shorter
43
44 for(unsigned i=0; i<src.size(); i++)
45 {
46 T ch=(unsigned char)src[i];
47
48 if(ch=='\\') // escape?
49 {
50 // go to next character
51 i++;
52 INVARIANT(i < src.size(), "backslash can't be last character");
53
54 ch=(unsigned char)src[i];
55 switch(ch)
56 {
57 case '\\': dest.push_back(ch); break;
58 case 'n': dest.push_back('\n'); break; /* NL (0x0a) */
59 case 't': dest.push_back('\t'); break; /* HT (0x09) */
60 case 'v': dest.push_back('\v'); break; /* VT (0x0b) */
61 case 'b': dest.push_back('\b'); break; /* BS (0x08) */
62 case 'r': dest.push_back('\r'); break; /* CR (0x0d) */
63 case 'f': dest.push_back('\f'); break; /* FF (0x0c) */
64 case 'a': dest.push_back('\a'); break; /* BEL (0x07) */
65 case '"': dest.push_back('"'); break;
66 case '\'': dest.push_back('\''); break;
67
68 case 'u': // universal character
69 case 'U': // universal character
70 i++;
71
72 {
73 std::string hex;
74
75 const unsigned digits = (ch == 'u') ? 4u : 8u;
76 hex.reserve(digits);
77
78 for(unsigned count=digits;
79 count!=0 && i<src.size();
80 i++, count--)
81 hex+=src[i];
82
83 // go back
84 i--;
85
86 unsigned int result=hex_to_unsigned(hex.c_str(), hex.size());
87
88 append_universal_char(result, dest);
89 }
90
91 break;
92
93 case 'x': // hex
94 i++;
95
96 {
97 std::string hex;
98
99 while(i<src.size() && isxdigit(src[i]))
100 {
101 hex+=src[i];
102 i++;
103 }
104
105 // go back
106 i--;
107
108 ch=hex_to_unsigned(hex.c_str(), hex.size());
109 }
110
111 // if T isn't sufficiently wide to hold unsigned values
112 // the following might truncate; but then
113 // universal characters in non-wide strings don't
114 // really work; gcc just issues a warning.
115 dest.push_back(ch);
116 break;
117
118 default:
119 if(isdigit(ch)) // octal
120 {
121 std::string octal;
122
123 while(i<src.size() && isdigit(src[i]))
124 {
125 octal+=src[i];
126 i++;
127 }
128
129 // go back
130 i--;
131
132 ch=octal_to_unsigned(octal.c_str(), octal.size());
133 dest.push_back(ch);
134 }
135 else
136 {
137 // Unknown escape sequence.
138 // Both GCC and CL turn \% into %.
139 dest.push_back(ch);
140 }
141 }
142 }
143 else
144 dest.push_back(ch);
145 }
146
147 return dest;
148}
149
150std::string unescape_string(const std::string &src)
151{
152 return unescape_string_templ<char>(src);
153}
154
155std::basic_string<char32_t> unescape_wide_string(const std::string &src)
156{
158}
159
160unsigned hex_to_unsigned(const char *hex, std::size_t digits)
161{
162 unsigned value=0;
163
164 for(; digits!=0; digits--, hex++)
165 {
166 char ch=*hex;
167
168 if(ch==0)
169 break;
170
171 value<<=4;
172
173 if(isdigit(ch))
174 value|=ch-'0';
175 else if(isxdigit(ch))
176 value|=10+tolower(ch)-'a';
177 }
178
179 return value;
180}
181
182unsigned octal_to_unsigned(const char *octal, std::size_t digits)
183{
184 unsigned value=0;
185
186 for(; digits!=0; digits--, octal++)
187 {
188 char ch=*octal;
189
190 if(ch==0)
191 break;
192
193 value<<=3;
194
195 if(isdigit(ch))
196 value|=ch-'0';
197 }
198
199 return value;
200}
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:562
int isdigit(int c)
Definition ctype.c:24
int tolower(int c)
Definition ctype.c:109
int isxdigit(int c)
Definition ctype.c:95
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition invariant.h:423
std::basic_string< T > unescape_string_templ(const std::string &src)
unsigned octal_to_unsigned(const char *octal, std::size_t digits)
std::basic_string< char32_t > unescape_wide_string(const std::string &src)
std::string unescape_string(const std::string &src)
static void append_universal_char(unsigned int value, std::string &dest)
unsigned hex_to_unsigned(const char *hex, std::size_t digits)
ANSI-C Language Conversion.
std::string utf32_native_endian_to_utf8(const std::basic_string< char32_t > &s)
Definition unicode.cpp:136