CBMC
string_utils.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module:
4 
5 Author: Daniel Poetzl
6 
7 \*******************************************************************/
8 
9 #include "string_utils.h"
10 #include "exception_utils.h"
11 #include "invariant.h"
12 
13 #include <algorithm>
14 #include <cctype>
15 #include <iomanip>
16 
21 std::string strip_string(const std::string &s)
22 {
23  auto pred=[](char c){ return std::isspace(c); };
24 
25  std::string::const_iterator left
26  =std::find_if_not(s.begin(), s.end(), pred);
27  if(left==s.end())
28  return "";
29 
30  std::string::size_type i=std::distance(s.begin(), left);
31 
32  std::string::const_reverse_iterator right
33  =std::find_if_not(s.rbegin(), s.rend(), pred);
34  std::string::size_type j=std::distance(right, s.rend())-1;
35 
36  return s.substr(i, (j-i+1));
37 }
38 
40  const std::string &s,
41  char delim,
42  std::vector<std::string> &result,
43  bool strip,
44  bool remove_empty)
45 {
46  PRECONDITION(result.empty());
47  // delim can't be a space character if using strip
48  PRECONDITION(!std::isspace(delim) || !strip);
49 
50  if(s.empty())
51  {
52  if(!remove_empty)
53  result.push_back("");
54  return;
55  }
56 
57  std::string::size_type n=s.length();
58  INVARIANT(n > 0, "Empty string case should already be handled");
59 
60  std::string::size_type start=0;
62 
63  for(i=0; i<n; i++)
64  {
65  if(s[i]==delim)
66  {
67  std::string new_s=s.substr(start, i-start);
68 
69  if(strip)
70  new_s=strip_string(new_s);
71 
72  if(!remove_empty || !new_s.empty())
73  result.push_back(new_s);
74 
75  start=i+1;
76  }
77  }
78 
79  std::string new_s=s.substr(start, n-start);
80 
81  if(strip)
82  new_s=strip_string(new_s);
83 
84  if(!remove_empty || !new_s.empty())
85  result.push_back(new_s);
86 
87  if(!remove_empty && result.empty())
88  result.push_back("");
89 }
90 
92  const std::string &s,
93  char delim,
94  std::string &left,
95  std::string &right,
96  bool strip)
97 {
98  // delim can't be a space character if using strip
99  PRECONDITION(!std::isspace(delim) || !strip);
100 
101  std::vector<std::string> result = split_string(s, delim, strip);
102 
103  if(result.size() != 2)
104  {
105  throw deserialization_exceptiont{"expected string '" + s +
106  "' to contain two substrings "
107  "delimited by " +
108  delim + " but has " +
109  std::to_string(result.size())};
110  }
111 
112  left=result[0];
113  right=result[1];
114 }
115 
116 std::vector<std::string> split_string(
117  const std::string &s,
118  char delim,
119  bool strip,
120  bool remove_empty)
121 {
122  std::vector<std::string> result;
123  split_string(s, delim, result, strip, remove_empty);
124  return result;
125 }
126 
128  const std::string &s,
129  const char delim)
130 {
131  std::string result;
132  const size_t index=s.find_last_of(delim);
133  if(index!=std::string::npos)
134  result=s.substr(0, index);
135  return result;
136 }
137 
138 std::string escape(const std::string &s)
139 {
140  std::string result;
141 
142  for(std::size_t i=0; i<s.size(); i++)
143  {
144  if(s[i]=='\\' || s[i]=='"')
145  result+='\\';
146 
147  result+=s[i];
148  }
149 
150  return result;
151 }
152 
153 std::string escape_non_alnum(const std::string &to_escape)
154 {
155  std::ostringstream escaped;
156  for(auto &ch : to_escape)
157  {
158  // `ch` may have a negative value in the case of utf-8 encodings of
159  // characters above unicode code point 127. The following line maps these
160  // negative values to positive values in the 128-255 range, using a
161  // `static_cast`. This is neccessary in order to avoid undefined behaviour
162  // in `isalnum`. The positive values are then stored in an integer using a
163  // widening initialisation so that the stream insertion operator prints them
164  // as numbers rather than characters.
165  const int uch{static_cast<unsigned char>(ch)};
166  if(ch == '_')
167  escaped << "__";
168  else if(isalnum(uch))
169  escaped << ch;
170  else
171  escaped << '_' << std::hex << std::setfill('0') << std::setw(2) << uch;
172  }
173  return escaped.str();
174 }
175 std::string capitalize(const std::string &str)
176 {
177  if(str.empty())
178  return str;
179  std::string capitalized = str;
180  capitalized[0] = toupper(capitalized[0]);
181  return capitalized;
182 }
183 
184 std::string wrap_line(
185  const std::string &line,
186  const std::size_t left_margin,
187  const std::size_t width)
188 {
189  return wrap_line(line.cbegin(), line.cend(), left_margin, width);
190 }
191 
192 std::string wrap_line(
193  std::string::const_iterator left,
194  std::string::const_iterator right,
195  const std::size_t left_margin,
196  const std::size_t width)
197 {
198  PRECONDITION(left_margin < width);
199 
200  const std::size_t column_width = width - left_margin;
201  const std::string margin(left_margin, ' ');
202 
203  auto distance = std::distance(left, right);
204  CHECK_RETURN(distance > 0);
205 
206  std::string result;
207 
208  if(static_cast<std::size_t>(distance) <= column_width)
209  {
210  result.append(margin);
211  result.append(left, right);
212 
213  return result;
214  }
215 
216  auto it_line_begin = left;
217 
218  do
219  {
220  // points to the first character past the current column
221  auto it = it_line_begin + column_width;
222 
223  auto rit_r = std::reverse_iterator<decltype(it)>(it) - 1;
224  auto rit_l = rit_r + column_width;
225 
226  auto rit_space = std::find(rit_r, rit_l, ' ');
227 
228  if(rit_space != rit_l)
229  {
230  auto it_space = rit_space.base() - 1;
231  CHECK_RETURN(*it_space == ' ');
232 
233  result.append(margin);
234  result.append(it_line_begin, it_space);
235  result.append("\n");
236 
237  it_line_begin = it_space + 1;
238  }
239  else
240  {
241  // we have not found a space, thus cannot wrap this line
242  result.clear();
243  result.append(left, right);
244 
245  return result;
246  }
247  } while(static_cast<std::size_t>(std::distance(it_line_begin, right)) >
248  column_width);
249 
250  result.append(margin);
251  result.append(it_line_begin, right);
252 
253  return result;
254 }
Thrown when failing to deserialize a value from some low level format, like JSON or raw bytes.
int isspace(int c)
Definition: ctype.c:80
int toupper(int c)
Definition: ctype.c:134
int isalnum(int c)
Definition: ctype.c:4
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition: invariant.h:423
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
std::string trim_from_last_delimiter(const std::string &s, const char delim)
void split_string(const std::string &s, char delim, std::vector< std::string > &result, bool strip, bool remove_empty)
std::string strip_string(const std::string &s)
Remove all whitespace characters from either end of a string.
std::string escape(const std::string &s)
Generic escaping of strings; this is not meant to be a particular programming language.
std::string escape_non_alnum(const std::string &to_escape)
Replace non-alphanumeric characters with _xx escapes, where xx are hex digits.
std::string wrap_line(const std::string &line, const std::size_t left_margin, const std::size_t width)
Wrap line at spaces to not extend past the right margin, and include given padding with spaces to the...
std::string capitalize(const std::string &str)
#define size_type
Definition: unistd.c:347