CBMC
string_constraint_generator_main.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Generates string constraints to link results from string functions
4  with their arguments. This is inspired by the PASS paper at HVC'13:
5  "PASS: String Solving with Parameterized Array and Interval Automaton"
6  by Guodong Li and Indradeep Ghosh, which gives examples of constraints
7  for several functions.
8 
9 Author: Romain Brenguier, romain.brenguier@diffblue.com
10 
11 \*******************************************************************/
12 
19 
22 
23 #include <iterator>
24 
25 #include <util/arith_tools.h>
26 #include <util/deprecate.h>
28 #include <util/mathematical_expr.h>
29 #include <util/simplify_expr.h>
30 #include <util/ssa_expr.h>
31 #include <util/string_constant.h>
32 
34  const namespacet &ns,
35  message_handlert &message_handler)
36  : array_pool(fresh_symbol), ns(ns), message_handler(message_handler)
37 {
38 }
39 
41 {
42  PRECONDITION(sum.operands().size() == 2);
43  const exprt zero = from_integer(0, sum.op0().type());
44  const binary_relation_exprt op0_negative(sum.op0(), ID_lt, zero);
45  const binary_relation_exprt op1_negative(sum.op1(), ID_lt, zero);
46  const binary_relation_exprt sum_negative(sum, ID_lt, zero);
47 
48  // overflow happens when we add two values of same sign but their sum has a
49  // different sign
50  return and_exprt(
51  equal_exprt(op0_negative, op1_negative),
52  notequal_exprt(op1_negative, sum_negative));
53 }
54 
64  const exprt &return_code,
66 {
67  PRECONDITION(f.arguments().size() == 2);
68 
72  f.arguments()[0].id() == ID_index ? to_index_expr(f.arguments()[0]).array()
73  : f.arguments()[0]);
74 
75  const exprt &pointer_expr = f.arguments()[1];
76  array_pool.insert(simplify_expr(pointer_expr, ns), array_expr);
77  // created_strings.emplace(to_array_string_expr(array_expr));
78  return equal_exprt{return_code, from_integer(0, f.type())};
79 }
80 
88  const exprt &return_code,
90 {
91  PRECONDITION(f.arguments().size() == 2);
93  const exprt &new_length = f.arguments()[1];
94 
95  const auto &length = array_pool.get_or_create_length(array_expr);
96  return and_exprt{equal_exprt{return_code, from_integer(0, f.type())},
97  equal_exprt(length, new_length)};
98 }
99 
102 {
103  std::move(
104  other.existential.begin(),
105  other.existential.end(),
106  std::back_inserter(result.existential));
107  std::move(
108  other.universal.begin(),
109  other.universal.end(),
110  std::back_inserter(result.universal));
111  std::move(
112  other.not_contains.begin(),
113  other.not_contains.end(),
114  std::back_inserter(result.not_contains));
115 }
116 
130  const array_string_exprt &s,
131  const exprt &start,
132  const exprt &end,
133  const std::string &char_set)
134 {
135  // Parse char_set
136  PRECONDITION(char_set.length() == 3);
137  PRECONDITION(char_set[1] == '-');
138  const integer_intervalt char_range(char_set[0], char_set[2]);
139 
140  // Add constraint
141  const symbol_exprt qvar = fresh_symbol("char_constr", s.length_type());
142  const exprt chr = s[qvar];
143  const string_constraintt sc(
144  qvar,
145  zero_if_negative(start),
146  zero_if_negative(end),
147  interval_constraint(chr, char_range),
149  return {{}, {sc}, {}};
150 }
151 
162 std::pair<exprt, string_constraintst>
165 {
166  const auto &args = f.arguments();
167  PRECONDITION(3 <= args.size() && args.size() <= 5);
168  PRECONDITION(args[2].type().id() == ID_string);
169  PRECONDITION(args[2].is_constant());
170 
171  const array_string_exprt s = array_pool.find(args[1], args[0]);
172  const irep_idt &char_set_string = to_constant_expr(args[2]).get_value();
173  const exprt &start =
174  args.size() >= 4 ? args[3] : from_integer(0, s.length_type());
175  const exprt &end =
176  args.size() >= 5 ? args[4] : array_pool.get_or_create_length(s);
177  auto constraints =
178  add_constraint_on_characters(s, start, end, char_set_string.c_str());
179  return {from_integer(0, get_return_code_type()), std::move(constraints)};
180 }
181 
183 {
184  return signedbv_typet(32);
185 }
186 
188 {
189  const exprt &name = expr.function();
190  PRECONDITION(name.id() == ID_symbol);
191  PRECONDITION(!is_ssa_expr(name));
192  return to_symbol_expr(name).get_identifier();
193 }
194 
195 std::optional<exprt>
197  const exprt &return_code,
198  const function_application_exprt &expr)
199 {
200  const irep_idt &id = get_function_name(expr);
201  if(id == ID_cprover_associate_array_to_pointer_func)
202  return associate_array_to_pointer(return_code, expr);
203  else if(id == ID_cprover_associate_length_to_array_func)
204  return associate_length_to_array(return_code, expr);
205  return {};
206 }
207 
213 std::pair<exprt, string_constraintst>
215  const function_application_exprt &expr)
216 {
217  const irep_idt &id = get_function_name(expr);
218 
219  if(id == ID_cprover_char_literal_func)
220  return add_axioms_for_char_literal(expr);
221  else if(id == ID_cprover_string_length_func)
222  return add_axioms_for_length(expr);
223  else if(id == ID_cprover_string_equal_func)
224  return add_axioms_for_equals(expr);
225  else if(id == ID_cprover_string_equals_ignore_case_func)
227  else if(id == ID_cprover_string_is_empty_func)
228  return add_axioms_for_is_empty(expr);
229  else if(id == ID_cprover_string_char_at_func)
230  return add_axioms_for_char_at(expr);
231  else if(id == ID_cprover_string_is_prefix_func)
232  return add_axioms_for_is_prefix(expr, false);
233  else if(id == ID_cprover_string_is_suffix_func)
234  return add_axioms_for_is_suffix(expr, false);
235  else if(id == ID_cprover_string_startswith_func)
236  return add_axioms_for_is_prefix(expr, true);
237  else if(id == ID_cprover_string_endswith_func)
238  return add_axioms_for_is_suffix(expr, true);
239  else if(id == ID_cprover_string_contains_func)
240  return add_axioms_for_contains(expr);
241  else if(id == ID_cprover_string_index_of_func)
242  return add_axioms_for_index_of(expr);
243  else if(id == ID_cprover_string_last_index_of_func)
244  return add_axioms_for_last_index_of(expr);
245  else if(
246  id == ID_cprover_string_is_valid_int_func ||
247  id == ID_cprover_string_is_valid_long_func)
248  return add_axioms_for_is_valid_int(expr);
249  else if(id == ID_cprover_string_parse_int_func)
250  return add_axioms_for_parse_int(expr);
251  else if(id == ID_cprover_string_code_point_at_func)
252  return add_axioms_for_code_point_at(expr);
253  else if(id == ID_cprover_string_code_point_before_func)
255  else if(id == ID_cprover_string_code_point_count_func)
256  return add_axioms_for_code_point_count(expr);
257  else if(id == ID_cprover_string_offset_by_code_point_func)
259  else if(id == ID_cprover_string_compare_to_func)
260  return add_axioms_for_compare_to(expr);
261  else if(id == ID_cprover_string_literal_func)
262  return add_axioms_from_literal(expr);
263  else if(id == ID_cprover_string_concat_code_point_func)
265  else if(id == ID_cprover_string_substring_func)
266  return add_axioms_for_substring(expr);
267  else if(id == ID_cprover_string_trim_func)
268  return add_axioms_for_trim(expr);
269  else if(id == ID_cprover_string_empty_string_func)
270  return add_axioms_for_empty_string(expr);
271  else if(id == ID_cprover_string_copy_func)
272  return add_axioms_for_copy(expr);
273  else if(id == ID_cprover_string_of_int_hex_func)
274  return add_axioms_from_int_hex(expr);
275  else if(id == ID_cprover_string_of_float_func)
276  return add_axioms_for_string_of_float(expr);
277  else if(id == ID_cprover_string_of_float_scientific_notation_func)
279  else if(id == ID_cprover_string_of_double_func)
280  return add_axioms_from_double(expr);
281  else if(id == ID_cprover_string_of_long_func)
282  return add_axioms_from_long(expr);
283  else if(id == ID_cprover_string_set_length_func)
284  return add_axioms_for_set_length(expr);
285  else if(id == ID_cprover_string_delete_func)
286  return add_axioms_for_delete(expr);
287  else if(id == ID_cprover_string_delete_char_at_func)
288  return add_axioms_for_delete_char_at(expr);
289  else if(id == ID_cprover_string_replace_func)
290  return add_axioms_for_replace(expr);
291  else if(id == ID_cprover_string_constrain_characters_func)
293  else
294  {
295  std::string msg(
296  "string_constraint_generator::function_application: unknown symbol :");
297  msg += id2string(id);
299  }
300  UNREACHABLE;
301 }
302 
309 DEPRECATED(SINCE(2017, 10, 5, "should use substring instead"))
310 std::pair<exprt, string_constraintst>
311 string_constraint_generatort::add_axioms_for_copy(
313 {
314  const auto &args = f.arguments();
315  PRECONDITION(args.size() == 3 || args.size() == 5);
316  const array_string_exprt res = array_pool.find(args[1], args[0]);
317  const array_string_exprt str = get_string_expr(array_pool, args[2]);
318  const typet &index_type = str.length_type();
319  const exprt offset = args.size() == 3 ? from_integer(0, index_type) : args[3];
320  const exprt count =
321  args.size() == 3 ? array_pool.get_or_create_length(str) : args[4];
322  return add_axioms_for_substring(res, str, offset, plus_exprt(offset, count));
323 }
324 
330 std::pair<exprt, string_constraintst>
333 {
334  PRECONDITION(f.arguments().size() == 1);
336  return {array_pool.get_or_create_length(str), {}};
337 }
338 
342 {
343  return binary_relation_exprt(x, ID_ge, from_integer(0, x.type()));
344 }
345 
349 std::pair<exprt, string_constraintst>
352 {
354  PRECONDITION(
355  args.size() == 1); // there should be exactly 1 arg to char literal
356 
357  const exprt &arg = args[0];
358  // for C programs argument to char literal should be one string constant
359  // of size 1.
360  if(
361  arg.operands().size() == 1 &&
362  to_unary_expr(arg).op().operands().size() == 1 &&
363  to_unary_expr(to_unary_expr(arg).op()).op().operands().size() == 2 &&
364  to_binary_expr(to_unary_expr(to_unary_expr(arg).op()).op()).op0().id() ==
365  ID_string_constant)
366  {
368  to_binary_expr(to_unary_expr(to_unary_expr(arg).op()).op()).op0());
369  const std::string &sval = id2string(s.value());
370  CHECK_RETURN(sval.size() == 1);
371  return {from_integer(unsigned(sval[0]), arg.type()), {}};
372  }
373  else
374  {
376  }
377 }
378 
386 std::pair<exprt, string_constraintst>
389 {
390  PRECONDITION(f.arguments().size() == 2);
392  symbol_exprt char_sym =
393  fresh_symbol("char", to_array_type(str.type()).element_type());
394  string_constraintst constraints;
395  constraints.existential = {equal_exprt(char_sym, str[f.arguments()[1]])};
396  return {std::move(char_sym), std::move(constraints)};
397 }
398 
399 exprt minimum(const exprt &a, const exprt &b)
400 {
401  return if_exprt(binary_relation_exprt(a, ID_le, b), a, b);
402 }
403 
404 exprt maximum(const exprt &a, const exprt &b)
405 {
406  return if_exprt(binary_relation_exprt(a, ID_le, b), b, a);
407 }
408 
413 {
414  return maximum(from_integer(0, expr.type()), expr);
415 }
416 
419 std::pair<exprt, string_constraintst>
421  std::pair<exprt, string_constraintst> result1,
422  std::pair<exprt, string_constraintst> result2)
423 {
424  const exprt return_code = maximum(result1.first, result2.first);
425  merge(result2.second, std::move(result1.second));
426  return {simplify_expr(return_code, ns), std::move(result2.second)};
427 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
array_string_exprt get_string_expr(array_poolt &array_pool, const exprt &expr)
Fetch the string_exprt corresponding to the given refined_string_exprt.
Definition: array_pool.cpp:199
Boolean AND.
Definition: std_expr.h:2125
exprt get_or_create_length(const array_string_exprt &s)
Get the length of an array_string_exprt from the array_pool.
Definition: array_pool.cpp:26
void insert(const exprt &pointer_expr, const array_string_exprt &array)
Definition: array_pool.cpp:160
array_string_exprt find(const exprt &pointer, const exprt &length)
Creates a new array if the pointer is not pointing to an array.
Definition: array_pool.cpp:184
const typet & length_type() const
Definition: string_expr.h:70
const typet & element_type() const
The type of the elements of the array.
Definition: std_types.h:827
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition: std_expr.h:762
const irep_idt & get_value() const
Definition: std_expr.h:3003
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
const char * c_str() const
Definition: dstring.h:116
Equality.
Definition: std_expr.h:1366
Base class for all expressions.
Definition: expr.h:56
typet & type()
Return the type of the expression.
Definition: expr.h:84
operandst & operands()
Definition: expr.h:94
Application of (mathematical) function.
The trinary if-then-else operator.
Definition: std_expr.h:2375
exprt & array()
Definition: std_expr.h:1500
const irept & find(const irep_idt &name) const
Definition: irep.cpp:93
const irep_idt & id() const
Definition: irep.h:388
exprt & op1()
Definition: std_expr.h:938
exprt & op0()
Definition: std_expr.h:932
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:94
Disequality.
Definition: std_expr.h:1425
The plus expression Associativity is not specified.
Definition: std_expr.h:1002
Fixed-width bit-vector with two's complement interpretation.
void value(const irep_idt &)
std::pair< exprt, string_constraintst > add_axioms_for_empty_string(const function_application_exprt &f)
Add axioms to say that the returned string expression is empty.
std::pair< exprt, string_constraintst > add_axioms_for_compare_to(const function_application_exprt &f)
Lexicographic comparison of two strings.
std::pair< exprt, string_constraintst > add_axioms_for_equals_ignore_case(const function_application_exprt &f)
Equality of the content ignoring case of characters.
std::pair< exprt, string_constraintst > add_axioms_for_delete(const array_string_exprt &res, const array_string_exprt &str, const exprt &start, const exprt &end)
Add axioms stating that res corresponds to the input str where we removed characters between the posi...
std::pair< exprt, string_constraintst > add_axioms_for_last_index_of(const array_string_exprt &str, const exprt &c, const exprt &from_index)
Add axioms stating that the returned value is the index within haystack (str) of the last occurrence ...
std::pair< exprt, string_constraintst > add_axioms_from_double(const function_application_exprt &f)
Add axioms corresponding to the String.valueOf(D) java function.
exprt associate_array_to_pointer(const exprt &return_code, const function_application_exprt &f)
Associate a char array to a char pointer.
std::pair< exprt, string_constraintst > add_axioms_for_substring(const array_string_exprt &res, const array_string_exprt &str, const exprt &start, const exprt &end)
Add axioms ensuring that res corresponds to the substring of str between indexes ‘start’ = max(start,...
std::pair< exprt, string_constraintst > add_axioms_for_equals(const function_application_exprt &f)
Equality of the content of two strings.
std::pair< exprt, string_constraintst > add_axioms_for_is_valid_int(const function_application_exprt &f)
Check a string is a valid integer, using the same rules as Java Integer.parseInt.
std::pair< exprt, string_constraintst > add_axioms_from_int_hex(const array_string_exprt &res, const exprt &i)
Add axioms stating that the string res corresponds to the integer argument written in hexadecimal.
std::pair< exprt, string_constraintst > add_axioms_for_is_empty(const function_application_exprt &f)
Add axioms stating that the returned value is true exactly when the argument string is empty.
std::optional< exprt > make_array_pointer_association(const exprt &return_code, const function_application_exprt &expr)
Associate array to pointer, and array to length.
std::pair< exprt, string_constraintst > add_axioms_for_index_of(const array_string_exprt &str, const exprt &c, const exprt &from_index)
Add axioms stating that the returned value is the index within haystack (str) of the first occurrence...
std::pair< exprt, string_constraintst > add_axioms_for_constrain_characters(const function_application_exprt &f)
Add axioms to ensure all characters of a string belong to a given set.
std::pair< exprt, string_constraintst > add_axioms_for_function_application(const function_application_exprt &expr)
strings contained in this call are converted to objects of type string_exprt, through adding axioms.
std::pair< exprt, string_constraintst > add_axioms_for_set_length(const function_application_exprt &f)
Reduce or extend a string to have the given length.
std::pair< exprt, string_constraintst > add_axioms_for_offset_by_code_point(const function_application_exprt &f)
Add axioms corresponding the String.offsetByCodePointCount java function.
std::pair< exprt, string_constraintst > add_axioms_for_is_prefix(const array_string_exprt &prefix, const array_string_exprt &str, const exprt &offset)
Add axioms stating that the returned expression is true exactly when the offset is greater or equal t...
std::pair< exprt, string_constraintst > add_axioms_for_trim(const function_application_exprt &f)
Remove leading and trailing whitespaces.
std::pair< exprt, string_constraintst > add_axioms_for_replace(const function_application_exprt &f)
Replace a character by another in a string.
std::pair< exprt, string_constraintst > add_axioms_for_parse_int(const function_application_exprt &f)
Integer value represented by a string.
exprt associate_length_to_array(const exprt &return_code, const function_application_exprt &f)
Associate an integer length to a char array.
std::pair< exprt, string_constraintst > add_axioms_from_literal(const function_application_exprt &f)
String corresponding to an internal cprover string.
std::pair< exprt, string_constraintst > add_axioms_for_is_suffix(const function_application_exprt &f, bool swap_arguments)
Test if the target is a suffix of the string.
std::pair< exprt, string_constraintst > add_axioms_for_contains(const function_application_exprt &f)
Test whether a string contains another.
string_constraint_generatort(const namespacet &ns, message_handlert &message_handler)
string_constraintst add_constraint_on_characters(const array_string_exprt &s, const exprt &start, const exprt &end, const std::string &char_set)
Add constraint on characters of a string.
std::pair< exprt, string_constraintst > add_axioms_from_float_scientific_notation(const array_string_exprt &res, const exprt &f)
Add axioms to write the float in scientific notation.
std::pair< exprt, string_constraintst > add_axioms_for_copy(const function_application_exprt &f)
add axioms to say that the returned string expression is equal to the argument of the function applic...
std::pair< exprt, string_constraintst > add_axioms_for_string_of_float(const function_application_exprt &f)
String representation of a float value.
std::pair< exprt, string_constraintst > add_axioms_from_long(const function_application_exprt &f)
Add axioms corresponding to the String.valueOf(J) java function.
std::pair< exprt, string_constraintst > add_axioms_for_code_point_count(const function_application_exprt &f)
Add axioms corresponding the String.codePointCount java function.
std::pair< exprt, string_constraintst > add_axioms_for_concat_code_point(const function_application_exprt &f)
Add axioms corresponding to the StringBuilder.appendCodePoint(I) function.
std::pair< exprt, string_constraintst > add_axioms_for_code_point_at(const function_application_exprt &f)
add axioms corresponding to the String.codePointAt java function
std::pair< exprt, string_constraintst > add_axioms_for_char_at(const function_application_exprt &f)
Character at a given position.
std::pair< exprt, string_constraintst > add_axioms_for_code_point_before(const function_application_exprt &f)
add axioms corresponding to the String.codePointBefore java function
std::pair< exprt, string_constraintst > add_axioms_for_delete_char_at(const function_application_exprt &expr)
add axioms corresponding to the StringBuilder.deleteCharAt java function
std::pair< exprt, string_constraintst > add_axioms_for_length(const function_application_exprt &f)
Length of a string.
std::pair< exprt, string_constraintst > add_axioms_for_char_literal(const function_application_exprt &f)
add axioms stating that the returned value is equal to the argument
std::pair< exprt, string_constraintst > combine_results(std::pair< exprt, string_constraintst > result1, std::pair< exprt, string_constraintst > result2)
Combine the results of two add_axioms function by taking the maximum of the return codes and merging ...
Expression to hold a symbol (variable)
Definition: std_expr.h:131
const irep_idt & get_identifier() const
Definition: std_expr.h:160
The type of an expression, extends irept.
Definition: type.h:29
#define SINCE(year, month, day, msg)
Definition: deprecate.h:26
#define DEPRECATED(msg)
Definition: deprecate.h:23
exprt interval_constraint(const exprt &expr, const integer_intervalt &interval)
Given an exprt and an integer interval return an exprt that represents restricting the expression to ...
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
API to expression classes for 'mathematical' expressions.
exprt simplify_expr(exprt src, const namespacet &ns)
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
#define UNREACHABLE
This should be used to mark dead code.
Definition: invariant.h:525
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:534
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
#define UNIMPLEMENTED
Definition: invariant.h:558
bool is_ssa_expr(const exprt &expr)
Definition: ssa_expr.h:125
const constant_exprt & to_constant_expr(const exprt &expr)
Cast an exprt to a constant_exprt.
Definition: std_expr.h:3050
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:272
const binary_exprt & to_binary_expr(const exprt &expr)
Cast an exprt to a binary_exprt.
Definition: std_expr.h:715
const unary_exprt & to_unary_expr(const exprt &expr)
Cast an exprt to a unary_exprt.
Definition: std_expr.h:426
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
Definition: std_expr.h:1538
bool is_constant(const typet &type)
This method tests, if the given typet is a constant.
Definition: std_types.h:29
const array_typet & to_array_type(const typet &type)
Cast a typet to an array_typet.
Definition: std_types.h:888
const string_constantt & to_string_constant(const exprt &expr)
Generates string constraints to link results from string functions with their arguments.
exprt is_positive(const exprt &x)
static irep_idt get_function_name(const function_application_exprt &expr)
exprt sum_overflows(const plus_exprt &sum)
exprt maximum(const exprt &a, const exprt &b)
void merge(string_constraintst &result, string_constraintst other)
Merge two sets of constraints by appending to the first one.
exprt minimum(const exprt &a, const exprt &b)
exprt zero_if_negative(const exprt &expr)
Returns a non-negative version of the argument.
signedbv_typet get_return_code_type()
array_string_exprt & to_array_string_expr(exprt &expr)
Definition: string_expr.h:96
#define string_refinement_invariantt(reason)
Collection of constraints of different types: existential formulas, universal formulas,...
std::vector< string_not_contains_constraintt > not_contains
std::vector< exprt > existential
std::vector< string_constraintt > universal