CBMC
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
string_constraint_generator_valueof.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: Generates string constraints for functions generating strings
4 from other types, in particular int, long, float, double, char, bool
5
6Author: Romain Brenguier, romain.brenguier@diffblue.com
7
8\*******************************************************************/
9
13
15
16#include <util/deprecate.h>
18#include <util/simplify_expr.h>
19
20#include <cmath>
21
28static unsigned long to_integer_or_default(
29 const exprt &expr,
30 unsigned long def,
31 const namespacet &ns)
32{
33 if(const auto i = numeric_cast<unsigned long>(simplify_expr(expr, ns)))
34 return *i;
35 return def;
36}
37
42DEPRECATED(SINCE(2017, 10, 5, "use add_axioms_for_string_of_int instead"))
44string_constraint_generatort::add_axioms_from_long(
46{
47 PRECONDITION(f.arguments().size() == 3 || f.arguments().size() == 4);
49 array_pool.find(f.arguments()[1], f.arguments()[0]);
50 if(f.arguments().size() == 4)
51 return add_axioms_for_string_of_int_with_radix(
52 res, f.arguments()[2], f.arguments()[3], 0);
53 else
54 return add_axioms_for_string_of_int(res, f.arguments()[2], 0);
55}
56
63DEPRECATED(SINCE(2017, 10, 5, "Java specific, should be implemented in Java"))
65string_constraint_generatort::add_axioms_from_bool(
67 const exprt &b)
68{
69 const typet &char_type = to_type_with_subtype(res.content().type()).subtype();
70 PRECONDITION(b.type() == bool_typet() || b.type().id() == ID_c_bool);
71 string_constraintst constraints;
73
74 // We add axioms:
75 // a1 : eq => res = |"true"|
76 // a2 : forall i < |"true"|. eq => res[i]="true"[i]
77 // a3 : !eq => res = |"false"|
78 // a4 : forall i < |"false"|. !eq => res[i]="false"[i]
79
80 std::string str_true = "true";
81 const implies_exprt a1(
82 eq, equal_to(array_pool.get_or_create_length(res), str_true.length()));
83 constraints.existential.push_back(a1);
84
85 for(std::size_t i = 0; i < str_true.length(); i++)
86 {
89 constraints.existential.push_back(a2);
90 }
91
92 std::string str_false = "false";
93 const implies_exprt a3(
95 equal_to(array_pool.get_or_create_length(res), str_false.length()));
96 constraints.existential.push_back(a3);
97
98 for(std::size_t i = 0; i < str_false.length(); i++)
99 {
102 constraints.existential.push_back(a4);
103 }
104
105 return {from_integer(0, get_return_code_type()), std::move(constraints)};
106}
107
116std::pair<exprt, string_constraintst>
126
136std::pair<exprt, string_constraintst>
138 const array_string_exprt &res,
139 const exprt &input_int,
140 const exprt &radix,
141 size_t max_size)
142{
143 PRECONDITION(max_size < std::numeric_limits<size_t>::max());
144 const typet &type = input_int.type();
145 PRECONDITION(type.id() == ID_signedbv);
146
149 const unsigned long radix_ul = to_integer_or_default(radix, 0, ns);
150 CHECK_RETURN((radix_ul >= 2 && radix_ul <= 36) || radix_ul == 0);
151
152 if(max_size == 0)
153 {
154 max_size = max_printed_string_length(type, radix_ul);
155 CHECK_RETURN(max_size < std::numeric_limits<size_t>::max());
156 }
157
158 const typet &char_type = to_type_with_subtype(res.content().type()).subtype();
160 const typecast_exprt radix_input_type(radix, type);
161 const bool strict_formatting = true;
162
166 input_int,
167 type,
169 res,
170 max_size,
172 radix_ul);
173 result2.existential.push_back(conjunction(std::move(result1)));
174 return {from_integer(0, get_return_code_type()), std::move(result2)};
175}
176
182{
183 const exprt zero_char = from_integer('0', chr.type());
184 const exprt nine_char = from_integer('9', chr.type());
185 const exprt a_char = from_integer('a', chr.type());
186 return if_exprt(
190}
191
198DEPRECATED(SINCE(2017, 10, 5, "use add_axioms_for_string_of_int_with_radix"))
200string_constraint_generatort::add_axioms_from_int_hex(
202 const exprt &i)
203{
204 const typet &type = i.type();
205 PRECONDITION(type.id() == ID_signedbv);
206 string_constraintst constraints;
207 const typet &index_type = res.length_type();
208 const typet &char_type = to_type_with_subtype(res.content().type()).subtype();
209 exprt sixteen = from_integer(16, index_type);
215
216 size_t max_size = 8;
217 constraints.existential.push_back(and_exprt(
218 greater_than(array_pool.get_or_create_length(res), 0),
219 less_than_or_equal_to(array_pool.get_or_create_length(res), max_size)));
220
221 for(size_t size = 1; size <= max_size; size++)
222 {
223 exprt sum = from_integer(0, type);
225 exprt chr = res[0];
226
227 for(size_t j = 0; j < size; j++)
228 {
229 chr = res[j];
233 and_exprt(
236 and_exprt(
240 }
241
242 const equal_exprt premise =
243 equal_to(array_pool.get_or_create_length(res), size);
244 constraints.existential.push_back(
246
247 // disallow 0s at the beginning
248 if(size > 1)
249 constraints.existential.push_back(
251 }
252 return {from_integer(0, get_return_code_type()), std::move(constraints)};
253}
254
258std::pair<exprt, string_constraintst>
267
278std::vector<exprt>
280 const array_string_exprt &str,
281 const exprt &radix_as_char,
282 const unsigned long radix_ul,
283 const std::size_t max_size,
284 const bool strict_formatting)
285{
286 std::vector<exprt> conjuncts;
287 const typet &char_type = to_type_with_subtype(str.content().type()).subtype();
288 const typet &index_type = str.length_type();
289
290 const exprt &chr = str[0];
295
296 // |str| > 0
298 array_pool.get_or_create_length(str), from_integer(1, index_type));
299 conjuncts.push_back(non_empty);
300
302 {
303 // str[0] = '-' || is_digit_with_radix(str[0], radix)
305 conjuncts.push_back(correct_first);
306 }
307 else
308 {
309 // str[0] = '-' || str[0] = '+' || is_digit_with_radix(str[0], radix)
312 conjuncts.push_back(correct_first);
313 }
314
315 // str[0]='+' or '-' ==> |str| > 1
319 array_pool.get_or_create_length(str), from_integer(2, index_type)));
320 conjuncts.push_back(contains_digit);
321
322 // |str| <= max_size
323 conjuncts.push_back(
325
326 // forall 1 <= i < |str| . is_digit_with_radix(str[i], radix)
327 // We unfold the above because we know that it will be used for all i up to
328 // |str|, and |str| <= max_size.
329 for(std::size_t index = 1; index < max_size; ++index)
330 {
335 from_integer(index + 1, index_type)),
337 str[index], strict_formatting, radix_as_char, radix_ul));
339 }
340
342 {
344
345 // no_leading_zero : str[0] = '0' => |str| = 1
348 equal_to(
349 array_pool.get_or_create_length(str), from_integer(1, index_type)));
350 conjuncts.push_back(no_leading_zero);
351
352 // no_leading_zero_after_minus : str[0]='-' => str[1]!='0'
356 }
357 return conjuncts;
358}
359
374 const exprt &input_int,
375 const typet &type,
376 const bool strict_formatting,
377 const array_string_exprt &str,
378 const std::size_t max_string_length,
379 const exprt &radix,
380 const unsigned long radix_ul)
381{
382 string_constraintst constraints;
383 const typet &char_type = to_type_with_subtype(str.content().type()).subtype();
384
386 const constant_exprt zero_expr = from_integer(0, type);
387
389 str[0], char_type, type, strict_formatting, radix_ul);
390
394 constraints.existential.push_back(implies_exprt(
397
398 for(size_t size = 2; size <= max_string_length; size++)
399 {
400 // sum_0 := numeric value of res[0] (which is 0 if res[0] is '-')
401 // For each 1<=j<max_string_length, we have:
402 // sum_j := radix * sum_{j-1} + numeric value of res[j]
403 // no_overflow_j := sum_{j-1} == (radix * sum_{j-1} / radix)
404 // && sum_j >= sum_{j - 1}
405 // (the first part avoid overflows in the multiplication and the second
406 // one in the addition of the definition of sum_j)
407 // For all 1<=size<=max_string_length we add axioms:
408 // a5 : |res| >= size => no_overflow_j (only added when overflow possible)
409 // a6 : |res| == size && res[0] is a digit for radix =>
410 // input_int == sum_{size-1}
411 // a7 : |res| == size && res[0] == '-' => input_int == -sum_{size-1}
412
413 const mult_exprt radix_sum(sum, radix);
414 // new_sum = radix * sum + (numeric value of res[j])
415 const exprt new_sum = plus_exprt(
416 radix_sum,
418 str[size - 1], char_type, type, strict_formatting, radix_ul));
419
420 // An overflow can happen when reaching the last index which can contain
421 // a digit, which is `max_string_length - 2` because of the space left for
422 // a minus sign. That assumes that we were able to identify the radix. If we
423 // weren't then we check for overflow on every index.
424 std::optional<exprt> no_overflow;
425 if(size - 1 >= max_string_length - 2 || radix_ul == 0)
426 {
429 }
430 sum = new_sum;
431
433
434 if(no_overflow.has_value())
435 {
437 length_expr, ID_ge, from_integer(size, length_expr.type())};
439 constraints.existential.push_back(a5);
440 }
441
443
444 const implies_exprt a6(
447 constraints.existential.push_back(a6);
448
449 const implies_exprt a7(
452 constraints.existential.push_back(a7);
453 }
454 return constraints;
455}
456
460 const typet &target_int_type)
461{
462 PRECONDITION(f.arguments().size() == 1 || f.arguments().size() == 2);
465
466 const exprt radix =
467 f.arguments().size() == 1
468 ? static_cast<exprt>(from_integer(10, target_int_type))
469 : static_cast<exprt>(typecast_exprt(f.arguments()[1], target_int_type));
470 // Most of the time we can evaluate radix as an integer. The value 0 is used
471 // to indicate when we can't tell what the radix is.
472 const unsigned long radix_ul = to_integer_or_default(radix, 0, ns);
473 PRECONDITION((radix_ul >= 2 && radix_ul <= 36) || radix_ul == 0);
474
475 const std::size_t max_string_length =
477
478 return {str, radix, radix_ul, max_string_length};
479}
480
488std::pair<exprt, string_constraintst>
491{
492 irep_idt called_function = to_symbol_expr(f.function()).get_identifier();
494 called_function == ID_cprover_string_is_valid_int_func ||
495 called_function == ID_cprover_string_is_valid_long_func);
496 const signedbv_typet target_int_type{static_cast<size_t>(
497 called_function == ID_cprover_string_is_valid_int_func ? 32 : 64)};
499
500 const typet &char_type =
501 to_type_with_subtype(args.str.content().type()).subtype();
502 const typecast_exprt radix_as_char(args.radix, char_type);
503 const bool strict_formatting = false;
504
508 args.str,
510 args.radix_ul,
511 args.max_string_length,
513
514 return {typecast_exprt{conjunction(conjuncts), f.type()}, {}};
515}
516
524std::pair<exprt, string_constraintst>
527{
528 auto args = unpack_parseint_arguments(f, f.type());
529
530 const typet &type = f.type();
531
532 const symbol_exprt input_int = fresh_symbol("parsed_int", type);
533 const bool strict_formatting = false;
534
536 input_int,
537 type,
539 args.str,
540 args.max_string_length,
541 args.radix,
542 args.radix_ul);
543
544 return {input_int, std::move(constraints2)};
545}
546
556 const exprt &chr,
557 const bool strict_formatting,
558 const exprt &radix_as_char,
559 const unsigned long radix_ul)
560{
561 PRECONDITION((radix_ul >= 2 && radix_ul <= 36) || radix_ul == 0);
562 const typet &char_type = chr.type();
564
568
569 if(radix_ul <= 10 && radix_ul != 0)
570 {
572 }
573 else
574 {
576 const exprt a_char = from_integer('a', char_type);
578
580
582 and_exprt(
585 and_exprt(
589
591 {
593 is_digit_when_radix_gt_10.copy_to_operands(and_exprt(
597 }
598
599 if(radix_ul == 0)
600 {
601 return if_exprt(
605 }
606 else
607 {
608 return std::move(is_digit_when_radix_gt_10);
609 }
610 }
611}
612
624 const exprt &chr,
625 const typet &char_type,
626 const typet &type,
627 const bool strict_formatting,
628 const unsigned long radix_ul)
629{
631
636
637 if(radix_ul <= 10 && radix_ul != 0)
638 {
640 return typecast_exprt(
641 if_exprt(
645 type);
646 }
647 else
648 {
654
656 {
659 return typecast_exprt(
660 if_exprt(
663 if_exprt(
667 type);
668 }
669 else
670 {
674 return typecast_exprt(
675 if_exprt(
678 if_exprt(
681 if_exprt(
684 from_integer(0, char_type)))),
685 type);
686 }
687 }
688}
689
697size_t max_printed_string_length(const typet &type, unsigned long radix_ul)
698{
699 if(radix_ul == 0)
700 {
701 radix_ul = 2;
702 }
703 double n_bits = static_cast<double>(to_bitvector_type(type).get_width());
704 double radix = static_cast<double>(radix_ul);
705 bool signed_type = type.id() == ID_signedbv;
724 double max = signed_type
725 ? floor(static_cast<double>(n_bits - 1) / log2(radix)) + 2.0
726 : ceil(static_cast<double>(n_bits) / log2(radix));
727 return static_cast<size_t>(max);
728}
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
array_string_exprt get_string_expr(array_poolt &array_pool, const exprt &expr)
Fetch the string_exprt corresponding to the given refined_string_exprt.
const bitvector_typet & to_bitvector_type(const typet &type)
Cast a typet to a bitvector_typet.
bitvector_typet char_type()
Definition c_types.cpp:106
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:562
Boolean AND.
Definition std_expr.h:2125
exprt get_or_create_length(const array_string_exprt &s)
Get the length of an array_string_exprt from the array_pool.
array_string_exprt find(const exprt &pointer, const exprt &length)
Creates a new array if the pointer is not pointing to an array.
const typet & length_type() const
Definition string_expr.h:70
A base class for expressions that are predicates, i.e., Boolean-typed, and that take exactly two argu...
Definition std_expr.h:731
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition std_expr.h:762
The Boolean type.
Definition std_types.h:36
A constant literal expression.
Definition std_expr.h:3117
Division.
Definition std_expr.h:1157
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition dstring.h:38
Equality.
Definition std_expr.h:1366
Base class for all expressions.
Definition expr.h:56
typet & type()
Return the type of the expression.
Definition expr.h:84
Application of (mathematical) function.
The trinary if-then-else operator.
Definition std_expr.h:2497
Boolean implication.
Definition std_expr.h:2225
const irep_idt & id() const
Definition irep.h:388
Binary minus.
Definition std_expr.h:1061
Binary multiplication Associativity is not specified.
Definition std_expr.h:1107
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition namespace.h:91
Boolean negation.
Definition std_expr.h:2454
Boolean OR.
Definition std_expr.h:2270
The plus expression Associativity is not specified.
Definition std_expr.h:1002
Fixed-width bit-vector with two's complement interpretation.
std::pair< exprt, string_constraintst > add_axioms_for_is_valid_int(const function_application_exprt &f)
Check a string is a valid integer, using the same rules as Java Integer.parseInt.
std::pair< exprt, string_constraintst > add_axioms_from_int_hex(const array_string_exprt &res, const exprt &i)
Add axioms stating that the string res corresponds to the integer argument written in hexadecimal.
std::vector< exprt > get_conjuncts_for_correct_number_format(const array_string_exprt &str, const exprt &radix_as_char, const unsigned long radix_ul, const std::size_t max_size, const bool strict_formatting)
Add axioms making the return value true if the given string is a correct number in the given radix.
parseint_argumentst unpack_parseint_arguments(const function_application_exprt &f, const typet &target_int_type)
std::pair< exprt, string_constraintst > add_axioms_for_string_of_int_with_radix(const array_string_exprt &res, const exprt &input_int, const exprt &radix, size_t max_size)
Add axioms enforcing that the string corresponds to the result of String.valueOf(II) or String....
std::pair< exprt, string_constraintst > add_axioms_for_string_of_int(const array_string_exprt &res, const exprt &input_int, size_t max_size)
Add axioms enforcing that the string corresponds to the result of String.valueOf(I) or String....
string_constraintst add_axioms_for_characters_in_integer_string(const exprt &input_int, const typet &type, const bool strict_formatting, const array_string_exprt &str, const std::size_t max_string_length, const exprt &radix, const unsigned long radix_ul)
Add axioms connecting the characters in the input string to the value of the output integer.
std::pair< exprt, string_constraintst > add_axioms_for_parse_int(const function_application_exprt &f)
Integer value represented by a string.
Expression to hold a symbol (variable)
Definition std_expr.h:131
The Boolean constant true.
Definition std_expr.h:3190
Semantic type conversion.
Definition std_expr.h:2073
The type of an expression, extends irept.
Definition type.h:29
The unary minus expression.
Definition std_expr.h:484
#define SINCE(year, month, day, msg)
Definition deprecate.h:26
#define DEPRECATED(msg)
Definition deprecate.h:23
double ceil(double x)
Definition math.c:1265
double log2(double x)
Definition math.c:2660
double floor(double x)
Definition math.c:1323
API to expression classes for 'mathematical' expressions.
bool is_number(const typet &type)
Returns true if the type is a rational, real, integer, natural, complex, unsignedbv,...
STL namespace.
exprt simplify_expr(exprt src, const namespacet &ns)
#define CHECK_RETURN(CONDITION)
Definition invariant.h:495
#define PRECONDITION(CONDITION)
Definition invariant.h:463
exprt conjunction(const exprt::operandst &op)
1) generates a conjunction for two or more operands 2) for one operand, returns the operand 3) return...
Definition std_expr.cpp:83
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition std_expr.h:272
Generates string constraints to link results from string functions with their arguments.
exprt get_numeric_value_from_character(const exprt &chr, const typet &char_type, const typet &type, const bool strict_formatting, unsigned long radix_ul)
Get the numeric value of a character, assuming that the radix is large enough.
exprt is_digit_with_radix(const exprt &chr, const bool strict_formatting, const exprt &radix_as_char, const unsigned long radix_ul)
Check if a character is a digit with respect to the given radix, e.g.
size_t max_printed_string_length(const typet &type, unsigned long ul_radix)
Calculate the string length needed to represent any value of the given type using the given radix.
signedbv_typet get_return_code_type()
size_t max_printed_string_length(const typet &type, unsigned long radix_ul)
Calculate the string length needed to represent any value of the given type using the given radix.
static unsigned long to_integer_or_default(const exprt &expr, unsigned long def, const namespacet &ns)
If the expression is a constant expression then we get the value of it as an unsigned long.
static exprt int_of_hex_char(const exprt &chr)
Returns the integer value represented by the character.
exprt get_numeric_value_from_character(const exprt &chr, const typet &char_type, const typet &type, const bool strict_formatting, const unsigned long radix_ul)
Get the numeric value of a character, assuming that the radix is large enough.
exprt is_digit_with_radix(const exprt &chr, const bool strict_formatting, const exprt &radix_as_char, const unsigned long radix_ul)
Check if a character is a digit with respect to the given radix, e.g.
binary_relation_exprt less_than_or_equal_to(exprt lhs, exprt rhs)
Definition string_expr.h:37
binary_relation_exprt greater_or_equal_to(exprt lhs, exprt rhs)
Definition string_expr.h:20
binary_relation_exprt greater_than(exprt lhs, exprt rhs)
Definition string_expr.h:26
equal_exprt equal_to(exprt lhs, exprt rhs)
Definition string_expr.h:55
Argument block for parseInt and cousins, common to parseInt itself and CProverString....
Collection of constraints of different types: existential formulas, universal formulas,...
std::vector< exprt > existential
const type_with_subtypet & to_type_with_subtype(const typet &type)
Definition type.h:208