CBMC
string_builtin_function.cpp
Go to the documentation of this file.
1 
5 
6 #include <algorithm>
7 #include <iterator>
8 
10 
13  const exprt &return_code,
14  const std::vector<exprt> &fun_args,
15  array_poolt &array_pool)
16  : string_builtin_functiont(return_code, array_pool)
17 {
18  PRECONDITION(fun_args.size() > 2);
19  const auto arg1 = expr_checked_cast<struct_exprt>(fun_args[2]);
20  input = array_pool.find(arg1.op1(), arg1.op0());
21  result = array_pool.find(fun_args[1], fun_args[0]);
22 }
23 
24 std::optional<std::vector<mp_integer>> eval_string(
25  const array_string_exprt &a,
26  const std::function<exprt(const exprt &)> &get_value)
27 {
28  if(a.id() == ID_if)
29  {
30  const if_exprt &ite = to_if_expr(a);
31  const exprt cond = get_value(ite.cond());
32  if(!cond.is_constant())
33  return {};
34  return cond.is_true()
35  ? eval_string(to_array_string_expr(ite.true_case()), get_value)
36  : eval_string(to_array_string_expr(ite.false_case()), get_value);
37  }
38 
39  const exprt content = get_value(a.content());
40  const auto &array = expr_try_dynamic_cast<array_exprt>(content);
41  if(!array)
42  return {};
43 
44  const auto &ops = array->operands();
45  std::vector<mp_integer> result;
46  const mp_integer unknown('?');
47  const auto &insert = std::back_inserter(result);
49  ops.begin(), ops.end(), insert, [unknown](const exprt &e) { // NOLINT
50  if(const auto i = numeric_cast<mp_integer>(e))
51  return *i;
52  return unknown;
53  });
54  return result;
55 }
56 
57 template <typename Iter>
58 static array_string_exprt
59 make_string(Iter begin, Iter end, const array_typet &array_type)
60 {
61  const typet &char_type = array_type.element_type();
62  array_exprt array_expr({}, array_type);
63  const auto &insert = std::back_inserter(array_expr.operands());
64  std::transform(begin, end, insert, [&](const mp_integer &i) {
65  return from_integer(i, char_type);
66  });
67  return to_array_string_expr(array_expr);
68 }
69 
71 make_string(const std::vector<mp_integer> &array, const array_typet &array_type)
72 {
73  return make_string(array.begin(), array.end(), array_type);
74 }
75 
77  const std::function<exprt(const exprt &)> &get_value) const
78 {
79  auto input_opt = eval_string(input, get_value);
80  if(!input_opt.has_value())
81  return {};
82  const mp_integer char_val = [&] {
83  if(const auto val = numeric_cast<mp_integer>(get_value(character)))
84  return *val;
85  INVARIANT(
86  get_value(character).id() == ID_unknown,
87  "character valuation should only contain constants and unknown");
89  }();
90  input_opt.value().push_back(char_val);
91  const auto length =
92  from_integer(input_opt.value().size(), result.length_type());
93  const array_typet type(to_type_with_subtype(result.type()).subtype(), length);
94  return make_string(input_opt.value(), type);
95 }
96 
104  string_constraint_generatort &generator,
105  message_handlert &message_handler) const
106 {
109  constraints.universal.push_back([&] {
110  const symbol_exprt idx =
111  generator.fresh_symbol("QA_index_concat_char", result.length_type());
112  const exprt upper_bound =
114  return string_constraintt(
115  idx, upper_bound, equal_exprt(input[idx], result[idx]), message_handler);
116  }());
117  constraints.existential.push_back(
119  constraints.existential.push_back(
121  return constraints;
122 }
123 
125 {
127 }
128 
130  const std::function<exprt(const exprt &)> &get_value) const
131 {
132  auto input_opt = eval_string(input, get_value);
133  const auto char_opt = numeric_cast<mp_integer>(get_value(character));
134  const auto position_opt = numeric_cast<mp_integer>(get_value(position));
135  if(!input_opt || !char_opt || !position_opt)
136  return {};
137  if(0 <= *position_opt && *position_opt < input_opt.value().size())
138  input_opt.value()[numeric_cast_v<std::size_t>(*position_opt)] = *char_opt;
139  const auto length =
140  from_integer(input_opt.value().size(), result.length_type());
141  const array_typet type(to_type_with_subtype(result.type()).subtype(), length);
142  return make_string(input_opt.value(), type);
143 }
144 
155  string_constraint_generatort &generator,
156  message_handlert &message_handler) const
157 {
161  and_exprt(
166  constraints.universal.push_back([&] {
167  const symbol_exprt q =
168  generator.fresh_symbol("QA_char_set", position.type());
169  const equal_exprt a3_body(result[q], input[q]);
170  return string_constraintt(
171  q,
174  a3_body,
175  message_handler);
176  }());
177  constraints.universal.push_back([&] {
178  const symbol_exprt q2 =
179  generator.fresh_symbol("QA_char_set2", position.type());
180  const equal_exprt a4_body(result[q2], input[q2]);
181  return string_constraintt(
182  q2,
185  a4_body,
186  message_handler);
187  }());
188  return constraints;
189 }
190 
192 {
193  const exprt out_of_bounds = or_exprt(
197  position, ID_lt, from_integer(0, input.length_type())));
198  const exprt return_value = if_exprt(
199  out_of_bounds,
202  return and_exprt(
203  equal_exprt(
206  equal_exprt(return_code, return_value));
207 }
208 
209 static bool eval_is_upper_case(const mp_integer &c)
210 {
211  // Characters between 'A' and 'Z' are upper-case
212  // Characters between 0xc0 (latin capital A with grave)
213  // and 0xd6 (latin capital O with diaeresis) are upper-case
214  // Characters between 0xd8 (latin capital O with stroke)
215  // and 0xde (latin capital thorn) are upper-case
216  return ('A' <= c && c <= 'Z') || (0xc0 <= c && c <= 0xd6) ||
217  (0xd8 <= c && c <= 0xde);
218 }
219 
221  const std::function<exprt(const exprt &)> &get_value) const
222 {
223  auto input_opt = eval_string(input, get_value);
224  if(!input_opt)
225  return {};
226  for(mp_integer &c : input_opt.value())
227  {
228  if(eval_is_upper_case(c))
229  c += 0x20;
230  }
231  const auto length =
232  from_integer(input_opt.value().size(), result.length_type());
233  const array_typet type(to_type_with_subtype(result.type()).subtype(), length);
234  return make_string(input_opt.value(), type);
235 }
236 
239 static exprt is_upper_case(const exprt &character)
240 {
241  const exprt char_A = from_integer('A', character.type());
242  const exprt char_Z = from_integer('Z', character.type());
243  exprt::operandst upper_case;
244  // Characters between 'A' and 'Z' are upper-case
245  upper_case.push_back(and_exprt(
246  binary_relation_exprt(char_A, ID_le, character),
247  binary_relation_exprt(character, ID_le, char_Z)));
248 
249  // Characters between 0xc0 (latin capital A with grave)
250  // and 0xd6 (latin capital O with diaeresis) are upper-case
251  upper_case.push_back(and_exprt(
253  from_integer(0xc0, character.type()), ID_le, character),
255  character, ID_le, from_integer(0xd6, character.type()))));
256 
257  // Characters between 0xd8 (latin capital O with stroke)
258  // and 0xde (latin capital thorn) are upper-case
259  upper_case.push_back(and_exprt(
261  from_integer(0xd8, character.type()), ID_le, character),
263  character, ID_le, from_integer(0xde, character.type()))));
264  return disjunction(upper_case);
265 }
266 
269 static exprt is_lower_case(const exprt &character)
270 {
271  return is_upper_case(
272  minus_exprt(character, from_integer(0x20, character.type())));
273 }
274 
286  string_constraint_generatort &generator,
287  message_handlert &message_handler) const
288 {
289  // \todo for now, only characters in Basic Latin and Latin-1 supplement
290  // are supported (up to 0x100), we should add others using case mapping
291  // information from the UnicodeData file.
294  constraints.universal.push_back([&] {
295  const symbol_exprt idx =
296  generator.fresh_symbol("QA_lower_case", result.length_type());
297  const exprt conditional_convert = [&] {
298  // The difference between upper-case and lower-case for the basic
299  // latin and latin-1 supplement is 0x20.
300  const typet &char_type = to_type_with_subtype(result.type()).subtype();
301  const exprt diff = from_integer(0x20, char_type);
302  const exprt converted =
303  equal_exprt(result[idx], plus_exprt(input[idx], diff));
304  const exprt non_converted = equal_exprt(result[idx], input[idx]);
305  return if_exprt(is_upper_case(input[idx]), converted, non_converted);
306  }();
307  return string_constraintt(
308  idx,
310  conditional_convert,
311  message_handler);
312  }());
313  return constraints;
314 }
315 
317  const std::function<exprt(const exprt &)> &get_value) const
318 {
319  auto input_opt = eval_string(input, get_value);
320  if(!input_opt)
321  return {};
322  for(mp_integer &c : input_opt.value())
323  {
324  if(eval_is_upper_case(c - 0x20))
325  c -= 0x20;
326  }
327  const auto length =
328  from_integer(input_opt.value().size(), result.length_type());
329  const array_typet type(to_type_with_subtype(result.type()).subtype(), length);
330  return make_string(input_opt.value(), type);
331 }
332 
344  symbol_generatort &fresh_symbol,
345  message_handlert &message_handler) const
346 {
349  constraints.universal.push_back([&] {
350  const symbol_exprt idx =
351  fresh_symbol("QA_upper_case", result.length_type());
352  const typet &char_type =
354  const exprt converted =
355  minus_exprt(input[idx], from_integer(0x20, char_type));
356  return string_constraintt(
357  idx,
359  equal_exprt(
360  result[idx],
361  if_exprt(is_lower_case(input[idx]), converted, input[idx])),
362  message_handler);
363  }());
364  return constraints;
365 }
366 
373  const exprt &return_code,
374  const std::vector<exprt> &fun_args,
375  array_poolt &array_pool)
376  : string_builtin_functiont(return_code, array_pool)
377 {
378  PRECONDITION(fun_args.size() >= 3);
379  result = array_pool.find(fun_args[1], fun_args[0]);
380  arg = fun_args[2];
381 }
382 
384  const std::function<exprt(const exprt &)> &get_value) const
385 {
386  const auto arg_value = numeric_cast<mp_integer>(get_value(arg));
387  if(!arg_value)
388  return {};
389  static std::string digits = "0123456789abcdefghijklmnopqrstuvwxyz";
390  const auto radix_value = numeric_cast<mp_integer>(get_value(radix));
391  if(!radix_value || *radix_value > digits.length())
392  return {};
393 
394  mp_integer current_value = *arg_value;
395  std::vector<mp_integer> right_to_left_characters;
396  if(current_value < 0)
397  current_value = -current_value;
398  if(current_value == 0)
399  right_to_left_characters.emplace_back('0');
400  while(current_value > 0)
401  {
402  const auto digit_value = (current_value % *radix_value).to_ulong();
403  right_to_left_characters.emplace_back(digits.at(digit_value));
404  current_value /= *radix_value;
405  }
406  if(*arg_value < 0)
407  right_to_left_characters.emplace_back('-');
408 
409  const auto length = right_to_left_characters.size();
410  const auto length_expr = from_integer(length, result.length_type());
411  const array_typet type(
412  to_type_with_subtype(result.type()).subtype(), length_expr);
413  return make_string(
414  right_to_left_characters.rbegin(), right_to_left_characters.rend(), type);
415 }
416 
418  string_constraint_generatort &generator,
419  message_handlert &message_handler) const
420 {
421  auto pair =
423  pair.second.existential.push_back(equal_exprt(pair.first, return_code));
424  return pair.second;
425 }
426 
428 {
429  const typet &type = result.length_type();
430  const auto radix_opt = numeric_cast<std::size_t>(radix);
431  const auto radix_value = radix_opt.has_value() ? radix_opt.value() : 2;
432  const std::size_t upper_bound =
433  max_printed_string_length(arg.type(), radix_value);
434  const exprt negative_arg =
435  binary_relation_exprt(arg, ID_le, from_integer(0, type));
436  const exprt absolute_arg =
437  if_exprt(negative_arg, unary_minus_exprt(arg), arg);
438 
439  exprt size_expr = from_integer(1, type);
440  exprt min_int_with_current_size = from_integer(1, radix.type());
441  for(std::size_t current_size = 2; current_size <= upper_bound + 1;
442  ++current_size)
443  {
444  min_int_with_current_size = mult_exprt(radix, min_int_with_current_size);
445  const exprt at_least_current_size =
446  binary_relation_exprt(absolute_arg, ID_ge, min_int_with_current_size);
447  size_expr = if_exprt(
448  at_least_current_size, from_integer(current_size, type), size_expr);
449  }
450 
451  const exprt size_expr_with_sign = if_exprt(
452  negative_arg, plus_exprt(size_expr, from_integer(1, type)), size_expr);
453  return equal_exprt(
454  array_pool.get_or_create_length(result), size_expr_with_sign);
455 }
456 
458  const exprt &return_code,
460  array_poolt &array_pool)
461  : string_builtin_functiont(return_code, array_pool), function_application(f)
462 {
463  const std::vector<exprt> &fun_args = f.arguments();
464  std::size_t i = 0;
465  if(fun_args.size() >= 2 && fun_args[1].type().id() == ID_pointer)
466  {
467  string_res = array_pool.find(fun_args[1], fun_args[0]);
468  i = 2;
469  }
470 
471  for(; i < fun_args.size(); ++i)
472  {
473  const auto arg = expr_try_dynamic_cast<struct_exprt>(fun_args[i]);
474  // TODO: use is_refined_string_type ?
475  if(
476  arg && arg->operands().size() == 2 &&
477  arg->op1().type().id() == ID_pointer)
478  {
479  INVARIANT(is_refined_string_type(arg->type()), "should be a string");
480  string_args.push_back(array_pool.find(arg->op1(), arg->op0()));
481  }
482  else
483  args.push_back(fun_args[i]);
484  }
485 }
486 
488  string_constraint_generatort &generator,
489  message_handlert &message_handler) const
490 {
491  auto pair =
493  pair.second.existential.push_back(equal_exprt(pair.first, return_code));
494  return pair.second;
495 }
static abstract_object_pointert transform(const exprt &expr, const std::vector< abstract_object_pointert > &operands, const abstract_environmentt &environment, const namespacet &ns)
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
bitvector_typet char_type()
Definition: c_types.cpp:106
Boolean AND.
Definition: std_expr.h:2120
Array constructor from list of elements.
Definition: std_expr.h:1616
Correspondance between arrays and pointers string representations.
Definition: array_pool.h:42
exprt get_or_create_length(const array_string_exprt &s)
Get the length of an array_string_exprt from the array_pool.
Definition: array_pool.cpp:26
array_string_exprt find(const exprt &pointer, const exprt &length)
Creates a new array if the pointer is not pointing to an array.
Definition: array_pool.cpp:184
const typet & length_type() const
Definition: string_expr.h:70
exprt & content()
Definition: string_expr.h:75
Arrays with given size.
Definition: std_types.h:807
const typet & element_type() const
The type of the elements of the array.
Definition: std_types.h:827
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition: std_expr.h:762
Equality.
Definition: std_expr.h:1361
Base class for all expressions.
Definition: expr.h:56
std::vector< exprt > operandst
Definition: expr.h:58
bool is_true() const
Return whether the expression is a constant representing true.
Definition: expr.cpp:27
typet & type()
Return the type of the expression.
Definition: expr.h:84
bool is_constant() const
Return whether the expression is a constant.
Definition: expr.h:212
Application of (mathematical) function.
The trinary if-then-else operator.
Definition: std_expr.h:2370
exprt & true_case()
Definition: std_expr.h:2397
exprt & false_case()
Definition: std_expr.h:2407
exprt & cond()
Definition: std_expr.h:2387
Boolean implication.
Definition: std_expr.h:2183
const irep_idt & id() const
Definition: irep.h:388
Binary minus.
Definition: std_expr.h:1061
Binary multiplication Associativity is not specified.
Definition: std_expr.h:1107
Boolean OR.
Definition: std_expr.h:2228
The plus expression Associativity is not specified.
Definition: std_expr.h:1002
std::vector< array_string_exprt > string_args
std::optional< array_string_exprt > string_res
string_builtin_function_with_no_evalt(const exprt &return_code, const function_application_exprt &f, array_poolt &array_pool)
string_constraintst constraints(string_constraint_generatort &generator, message_handlert &message_handler) const override
Add constraints ensuring that the value of result expression of the builtin function corresponds to t...
function_application_exprt function_application
Base class for string functions that are built in the solver.
std::optional< exprt > eval(const std::function< exprt(const exprt &)> &get_value) const override
Given a function get_value which gives a valuation to expressions, attempt to find the result of the ...
string_constraintst constraints(string_constraint_generatort &generator, message_handlert &message_handlert) const override
Set of constraints enforcing that result is the concatenation of input with character.
exprt length_constraint() const override
Constraint ensuring that the length of the strings are coherent with the function call.
std::pair< exprt, string_constraintst > add_axioms_for_string_of_int_with_radix(const array_string_exprt &res, const exprt &input_int, const exprt &radix, size_t max_size)
Add axioms enforcing that the string corresponds to the result of String.valueOf(II) or String....
std::pair< exprt, string_constraintst > add_axioms_for_function_application(const function_application_exprt &expr)
strings contained in this call are converted to objects of type string_exprt, through adding axioms.
string_creation_builtin_functiont(const exprt &return_code, const std::vector< exprt > &fun_args, array_poolt &array_pool)
Constructor from arguments of a function application.
string_constraintst constraints(string_constraint_generatort &generator, message_handlert &message_handler) const override
Add constraints ensuring that the value of result expression of the builtin function corresponds to t...
exprt length_constraint() const override
Constraint ensuring that the length of the strings are coherent with the function call.
std::optional< exprt > eval(const std::function< exprt(const exprt &)> &get_value) const override
Given a function get_value which gives a valuation to expressions, attempt to find the result of the ...
exprt length_constraint() const override
Constraint ensuring that the length of the strings are coherent with the function call.
string_constraintst constraints(string_constraint_generatort &generator, message_handlert &message_handler) const override
Set of constraints ensuring that result is similar to input where the character at index position is ...
std::optional< exprt > eval(const std::function< exprt(const exprt &)> &get_value) const override
Given a function get_value which gives a valuation to expressions, attempt to find the result of the ...
exprt length_constraint() const override
Constraint ensuring that the length of the strings are coherent with the function call.
string_constraintst constraints(string_constraint_generatort &generator, message_handlert &message_handler) const override
Set of constraints ensuring result corresponds to input in which uppercase characters have been conve...
std::optional< exprt > eval(const std::function< exprt(const exprt &)> &get_value) const override
Given a function get_value which gives a valuation to expressions, attempt to find the result of the ...
string_constraintst constraints(class symbol_generatort &fresh_symbol, message_handlert &message_handler) const
Set of constraints ensuring result corresponds to input in which lowercase characters of Basic Latin ...
std::optional< exprt > eval(const std::function< exprt(const exprt &)> &get_value) const override
Given a function get_value which gives a valuation to expressions, attempt to find the result of the ...
exprt length_constraint() const override
Constraint ensuring that the length of the strings are coherent with the function call.
string_transformation_builtin_functiont(exprt return_code, array_string_exprt result, array_string_exprt input, array_poolt &array_pool)
Expression to hold a symbol (variable)
Definition: std_expr.h:131
Generation of fresh symbols of a given type.
Definition: array_pool.h:22
const typet & subtype() const
Definition: type.h:187
The type of an expression, extends irept.
Definition: type.h:29
The unary minus expression.
Definition: std_expr.h:484
bool is_refined_string_type(const typet &type)
BigInt mp_integer
Definition: smt_terms.h:17
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
exprt disjunction(const exprt::operandst &op)
1) generates a disjunction for two or more operands 2) for one operand, returns the operand 3) return...
Definition: std_expr.cpp:71
const if_exprt & to_if_expr(const exprt &expr)
Cast an exprt to an if_exprt.
Definition: std_expr.h:2450
static exprt is_lower_case(const exprt &character)
Expression which is true for lower_case characters of the Basic Latin and Latin-1 supplement of unico...
static array_string_exprt make_string(Iter begin, Iter end, const array_typet &array_type)
static exprt is_upper_case(const exprt &character)
Expression which is true for uppercase characters of the Basic Latin and Latin-1 supplement of unicod...
std::optional< std::vector< mp_integer > > eval_string(const array_string_exprt &a, const std::function< exprt(const exprt &)> &get_value)
Given a function get_value which gives a valuation to expressions, attempt to find the current value ...
static bool eval_is_upper_case(const mp_integer &c)
#define CHARACTER_FOR_UNKNOWN
Module: String solver Author: Diffblue Ltd.
exprt length_constraint_for_concat_char(const array_string_exprt &res, const array_string_exprt &s1, array_poolt &array_pool)
Add axioms enforcing that the length of res is that of the concatenation of s1 with.
Generates string constraints to link results from string functions with their arguments.
exprt minimum(const exprt &a, const exprt &b)
exprt zero_if_negative(const exprt &expr)
Returns a non-negative version of the argument.
size_t max_printed_string_length(const typet &type, unsigned long ul_radix)
Calculate the string length needed to represent any value of the given type using the given radix.
array_string_exprt & to_array_string_expr(exprt &expr)
Definition: string_expr.h:96
Collection of constraints of different types: existential formulas, universal formulas,...
std::vector< exprt > existential
std::vector< string_constraintt > universal
const type_with_subtypet & to_type_with_subtype(const typet &type)
Definition: type.h:208