CBMC
character_refine_preprocess.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Preprocess a goto-programs so that calls to the java Character
4  library are replaced by simple expressions.
5 
6 Author: Romain Brenguier
7 
8 Date: March 2017
9 
10 \*******************************************************************/
11 
15 
17 
18 #include <util/arith_tools.h>
19 #include <util/bitvector_expr.h>
20 #include <util/std_expr.h>
21 
23 
28  exprt (*expr_function)(const exprt &chr, const typet &type),
29  conversion_inputt &target)
30 {
31  const code_function_callt &function_call=target;
32  PRECONDITION(function_call.arguments().size() == 1);
33  const exprt &arg=function_call.arguments()[0];
34  const exprt &result=function_call.lhs();
35  const typet &type=result.type();
36  return code_assignt(result, expr_function(arg, type));
37 }
38 
46  const exprt &chr,
47  const mp_integer &lower_bound,
48  const mp_integer &upper_bound)
49 {
50  return and_exprt(
51  binary_relation_exprt(chr, ID_ge, from_integer(lower_bound, chr.type())),
52  binary_relation_exprt(chr, ID_le, from_integer(upper_bound, chr.type())));
53 }
54 
61  const exprt &chr, const std::list<mp_integer> &list)
62 {
63  exprt::operandst ops;
64  for(const auto &i : list)
65  ops.push_back(equal_exprt(chr, from_integer(i, chr.type())));
66  return disjunction(ops);
67 }
68 
75  const exprt &chr, const typet &type)
76 {
77  exprt u010000=from_integer(0x010000, type);
78  binary_relation_exprt small(chr, ID_lt, u010000);
79  return if_exprt(small, from_integer(1, type), from_integer(2, type));
80 }
81 
86  conversion_inputt &target)
87 {
88  return convert_char_function(
90 }
91 
95  const exprt &chr, const typet &type)
96 {
97  return typecast_exprt(chr, type);
98 }
99 
104  conversion_inputt &target)
105 {
106  return convert_char_function(
108 }
109 
114 {
115  const code_function_callt &function_call=target;
116  PRECONDITION(function_call.arguments().size() == 2);
117  const exprt &char1=function_call.arguments()[0];
118  const exprt &char2=function_call.arguments()[1];
119  const exprt &result=function_call.lhs();
120  const typet &type=result.type();
121  binary_relation_exprt smaller(char1, ID_lt, char2);
122  binary_relation_exprt greater(char1, ID_gt, char2);
123  if_exprt expr(
124  smaller,
125  from_integer(-1, type),
126  if_exprt(greater, from_integer(1, type), from_integer(0, type)));
127 
128  return code_assignt(result, expr);
129 }
130 
139  conversion_inputt &target)
140 {
141  const code_function_callt &function_call=target;
142  const std::size_t nb_args=function_call.arguments().size();
143  PRECONDITION(nb_args==1 || nb_args==2);
144  const exprt &arg=function_call.arguments()[0];
145  // If there is no radix argument we set it to 36 which is the maximum possible
146  const exprt &radix=
147  nb_args>1?function_call.arguments()[1]:from_integer(36, arg.type());
148  const exprt &result=function_call.lhs();
149  const typet &type=result.type();
150 
151  // TODO: If the radix is not in the range MIN_RADIX <= radix <= MAX_RADIX or
152  // if the value of ch is not a valid digit in the specified radix,
153  // -1 is returned.
154 
155  // Case 1: The method isDigit is true of the character and the Unicode
156  // decimal digit value of the character (or its single-character
157  // decomposition) is less than the specified radix.
158  exprt invalid=from_integer(-1, arg.type());
159  exprt c0=from_integer('0', arg.type());
160  exprt latin_digit=in_interval_expr(arg, '0', '9');
161  minus_exprt value1(arg, c0);
162  // TODO: this is only valid for latin digits
163  if_exprt case1(
164  binary_relation_exprt(value1, ID_lt, radix), value1, invalid);
165 
166  // Case 2: The character is one of the uppercase Latin letters 'A'
167  // through 'Z' and its code is less than radix + 'A' - 10,
168  // then ch - 'A' + 10 is returned.
169  exprt cA=from_integer('A', arg.type());
170  exprt i10=from_integer(10, arg.type());
171  exprt upper_case=in_interval_expr(arg, 'A', 'Z');
172  plus_exprt value2(minus_exprt(arg, cA), i10);
173  if_exprt case2(
174  binary_relation_exprt(value2, ID_lt, radix), value2, invalid);
175 
176  // The character is one of the lowercase Latin letters 'a' through 'z' and
177  // its code is less than radix + 'a' - 10, then ch - 'a' + 10 is returned.
178  exprt ca=from_integer('a', arg.type());
179  exprt lower_case=in_interval_expr(arg, 'a', 'z');
180  plus_exprt value3(minus_exprt(arg, ca), i10);
181  if_exprt case3(
182  binary_relation_exprt(value3, ID_lt, radix), value3, invalid);
183 
184 
185  // The character is one of the fullwidth uppercase Latin letters A ('\uFF21')
186  // through Z ('\uFF3A') and its code is less than radix + '\uFF21' - 10.
187  // In this case, ch - '\uFF21' + 10 is returned.
188  exprt uFF21=from_integer(0xFF21, arg.type());
189  exprt fullwidth_upper_case=in_interval_expr(arg, 0xFF21, 0xFF3A);
190  plus_exprt value4(minus_exprt(arg, uFF21), i10);
191  if_exprt case4(
192  binary_relation_exprt(value4, ID_lt, radix), value4, invalid);
193 
194  // The character is one of the fullwidth lowercase Latin letters a ('\uFF41')
195  // through z ('\uFF5A') and its code is less than radix + '\uFF41' - 10.
196  // In this case, ch - '\uFF41' + 10 is returned.
197  exprt uFF41=from_integer(0xFF41, arg.type());
198  plus_exprt value5(minus_exprt(arg, uFF41), i10);
199  if_exprt case5(
200  binary_relation_exprt(value5, ID_lt, radix), value5, invalid);
201 
202  if_exprt fullwidth_cases(fullwidth_upper_case, case4, case5);
203  if_exprt expr(
204  latin_digit,
205  case1,
206  if_exprt(upper_case, case2, if_exprt(lower_case, case3, fullwidth_cases)));
207  typecast_exprt tc_expr(expr, type);
208 
209  return code_assignt(result, tc_expr);
210 }
211 
216 {
217  return convert_digit_char(target);
218 }
219 
226 {
227  const code_function_callt &function_call=target;
228  PRECONDITION(function_call.arguments().size() == 2);
229  const exprt &digit=function_call.arguments()[0];
230  const exprt &result=function_call.lhs();
231  const typet &type=result.type();
232  typecast_exprt casted_digit(digit, type);
233 
234  exprt d10=from_integer(10, type);
235  binary_relation_exprt small(casted_digit, ID_le, d10);
236  plus_exprt value1(casted_digit, from_integer('0', type));
237  plus_exprt value2(minus_exprt(casted_digit, d10), from_integer('a', type));
238  return code_assignt(result, if_exprt(small, value1, value2));
239 }
240 
245  conversion_inputt &target)
246 {
247  // TODO: This is unimplemented for now as it requires analyzing
248  // the UnicodeData file to find characters directionality.
249  return target;
250 }
251 
256  conversion_inputt &target)
257 {
258  return convert_get_directionality_char(target);
259 }
260 
266  conversion_inputt &target)
267 {
268  return convert_digit_char(target);
269 }
270 
277  conversion_inputt &target)
278 {
279  return convert_digit_int(target);
280 }
281 
286  conversion_inputt &target)
287 {
288  // TODO: This is unimplemented for now as it requires analyzing
289  // the UnicodeData file to categorize characters.
290  return target;
291 }
292 
297  conversion_inputt &target)
298 {
299  return convert_get_type_char(target);
300 }
301 
306 {
307  return convert_char_value(target);
308 }
309 
317  const exprt &chr, const typet &type)
318 {
319  exprt u10000=from_integer(0x010000, type);
320  exprt uD800=from_integer(0xD800, type);
321  exprt u400=from_integer(0x0400, type);
322 
323  plus_exprt high_surrogate(uD800, div_exprt(minus_exprt(chr, u10000), u400));
324  return std::move(high_surrogate);
325 }
326 
332  const exprt &chr, const typet &type)
333 {
334  (void)type; // unused parameter
335  return in_interval_expr(chr, 'a', 'z');
336 }
337 
343  const exprt &chr, const typet &type)
344 {
345  (void)type; // unused parameter
346  return in_interval_expr(chr, 'A', 'Z');
347 }
348 
358  const exprt &chr, const typet &type)
359 {
360  return or_exprt(
361  expr_of_is_ascii_upper_case(chr, type),
362  expr_of_is_ascii_lower_case(chr, type));
363 }
364 
376  const exprt &chr, const typet &type)
377 {
378  return expr_of_is_letter(chr, type);
379 }
380 
385  conversion_inputt &target)
386 {
387  return convert_char_function(
389 }
390 
398  const exprt &chr, const typet &type)
399 {
400  (void)type; // unused parameter
401  return and_exprt(
402  binary_relation_exprt(chr, ID_le, from_integer(0xFFFF, chr.type())),
403  binary_relation_exprt(chr, ID_ge, from_integer(0, chr.type())));
404 }
405 
410  conversion_inputt &target)
411 {
412  return convert_char_function(
414 }
415 
421  const exprt &chr, const typet &type)
422 {
423  (void)type; // unused parameter
424  // The following intervals are undefined in unicode, according to
425  // the Unicode Character Database: http://www.unicode.org/Public/UCD/latest/
426  exprt::operandst intervals;
427  intervals.push_back(in_interval_expr(chr, 0x0750, 0x077F));
428  intervals.push_back(in_interval_expr(chr, 0x07C0, 0x08FF));
429  intervals.push_back(in_interval_expr(chr, 0x1380, 0x139F));
430  intervals.push_back(in_interval_expr(chr, 0x18B0, 0x18FF));
431  intervals.push_back(in_interval_expr(chr, 0x1980, 0x19DF));
432  intervals.push_back(in_interval_expr(chr, 0x1A00, 0x1CFF));
433  intervals.push_back(in_interval_expr(chr, 0x1D80, 0x1DFF));
434  intervals.push_back(in_interval_expr(chr, 0x2C00, 0x2E7F));
435  intervals.push_back(in_interval_expr(chr, 0x2FE0, 0x2FEF));
436  intervals.push_back(in_interval_expr(chr, 0x31C0, 0x31EF));
437  intervals.push_back(in_interval_expr(chr, 0x9FB0, 0x9FFF));
438  intervals.push_back(in_interval_expr(chr, 0xA4D0, 0xABFF));
439  intervals.push_back(in_interval_expr(chr, 0xD7B0, 0xD7FF));
440  intervals.push_back(in_interval_expr(chr, 0xFE10, 0xFE1F));
441  intervals.push_back(in_interval_expr(chr, 0x10140, 0x102FF));
442  intervals.push_back(in_interval_expr(chr, 0x104B0, 0x107FF));
443  intervals.push_back(in_interval_expr(chr, 0x10840, 0x1CFFF));
444  intervals.push_back(in_interval_expr(chr, 0x1D200, 0x1D2FF));
445  intervals.push_back(in_interval_expr(chr, 0x1D360, 0x1D3FF));
446  intervals.push_back(
447  binary_relation_exprt(chr, ID_ge, from_integer(0x1D800, chr.type())));
448 
449  return not_exprt(disjunction(intervals));
450 }
451 
456  conversion_inputt &target)
457 {
458  return convert_char_function(
460 }
461 
466  conversion_inputt &target)
467 {
468  return convert_is_defined_char(target);
469 }
470 
486  const exprt &chr, const typet &type)
487 {
488  (void)type; // unused parameter
489  exprt latin_digit=in_interval_expr(chr, '0', '9');
490  exprt arabic_indic_digit=in_interval_expr(chr, 0x660, 0x669);
491  exprt extended_digit=in_interval_expr(chr, 0x6F0, 0x6F9);
492  exprt devanagari_digit=in_interval_expr(chr, 0x966, 0x96F);
493  exprt fullwidth_digit=in_interval_expr(chr, 0xFF10, 0xFF19);
494  or_exprt digit(
495  or_exprt(latin_digit, or_exprt(arabic_indic_digit, extended_digit)),
496  or_exprt(devanagari_digit, fullwidth_digit));
497  return std::move(digit);
498 }
499 
504  conversion_inputt &target)
505 {
506  return convert_char_function(
508 }
509 
514  conversion_inputt &target)
515 {
516  return convert_is_digit_char(target);
517 }
518 
525  const exprt &chr, const typet &type)
526 {
527  (void)type; // unused parameter
528  return in_interval_expr(chr, 0xD800, 0xDBFF);
529 }
530 
535  conversion_inputt &target)
536 {
537  return convert_char_function(
539 }
540 
550  const exprt &chr, const typet &type)
551 {
552  (void)type; // unused parameter
553  or_exprt ignorable(
554  in_interval_expr(chr, 0x0000, 0x0008),
555  or_exprt(
556  in_interval_expr(chr, 0x000E, 0x001B),
557  in_interval_expr(chr, 0x007F, 0x009F)));
558  return std::move(ignorable);
559 }
560 
567  conversion_inputt &target)
568 {
569  return convert_char_function(
571 }
572 
579  conversion_inputt &target)
580 {
582 }
583 
588  conversion_inputt &target)
589 {
590  const code_function_callt &function_call=target;
591  PRECONDITION(function_call.arguments().size() == 1);
592  const exprt &arg=function_call.arguments()[0];
593  const exprt &result=function_call.lhs();
594  exprt is_ideograph=in_interval_expr(arg, 0x4E00, 0x9FFF);
595  return code_assignt(result, is_ideograph);
596 }
597 
602  conversion_inputt &target)
603 {
604  const code_function_callt &function_call=target;
605  PRECONDITION(function_call.arguments().size() == 1);
606  const exprt &arg=function_call.arguments()[0];
607  const exprt &result=function_call.lhs();
608  or_exprt iso(
609  in_interval_expr(arg, 0x00, 0x1F), in_interval_expr(arg, 0x7F, 0x9F));
610  return code_assignt(result, iso);
611 }
612 
617  conversion_inputt &target)
618 {
619  return convert_is_ISO_control_char(target);
620 }
621 
629  conversion_inputt &target)
630 {
631  return convert_char_function(
633 }
634 
639  conversion_inputt &target)
640 {
642 }
643 
652  conversion_inputt &target)
653 {
654  return convert_char_function(
656 }
657 
662  conversion_inputt &target)
663 {
665 }
666 
671  conversion_inputt &target)
672 {
674 }
675 
680  conversion_inputt &target)
681 {
683 }
684 
689  conversion_inputt &target)
690 {
691  return convert_char_function(
693 }
694 
699  conversion_inputt &target)
700 {
701  return convert_is_letter_char(target);
702 }
703 
709  const exprt &chr, const typet &type)
710 {
711  return or_exprt(expr_of_is_letter(chr, type), expr_of_is_digit(chr, type));
712 }
713 
718  conversion_inputt &target)
719 {
720  return convert_char_function(
722 }
723 
728  conversion_inputt &target)
729 {
730  return convert_is_letter_or_digit_char(target);
731 }
732 
739  conversion_inputt &target)
740 {
741  return convert_char_function(
743 }
744 
751  conversion_inputt &target)
752 {
753  return convert_is_lower_case_char(target);
754 }
755 
760  conversion_inputt &target)
761 {
762  const code_function_callt &function_call=target;
763  PRECONDITION(function_call.arguments().size() == 1);
764  const exprt &arg=function_call.arguments()[0];
765  const exprt &result=function_call.lhs();
766  exprt is_low_surrogate=in_interval_expr(arg, 0xDC00, 0xDFFF);
767  return code_assignt(result, is_low_surrogate);
768 }
769 
778  const exprt &chr, const typet &type)
779 {
780  (void)type; // unused parameter
781  return in_list_expr(chr, {0x28, 0x29, 0x3C, 0x3E, 0x5B, 0x5D, 0x7B, 0x7D});
782 }
783 
790  conversion_inputt &target)
791 {
792  return convert_char_function(
794 }
795 
802  conversion_inputt &target)
803 {
804  return convert_is_mirrored_char(target);
805 }
806 
811 {
812  return convert_is_whitespace_char(target);
813 }
814 
821  const exprt &chr, const typet &type)
822 {
823  (void)type; // unused parameter
824  std::list<mp_integer> space_characters=
825  {0x20, 0x00A0, 0x1680, 0x202F, 0x205F, 0x3000, 0x2028, 0x2029};
826  exprt condition0=in_list_expr(chr, space_characters);
827  exprt condition1=in_interval_expr(chr, 0x2000, 0x200A);
828  return or_exprt(condition0, condition1);
829 }
830 
835  conversion_inputt &target)
836 {
837  return convert_char_function(
839 }
840 
845  conversion_inputt &target)
846 {
847  return convert_is_space_char(target);
848 }
849 
856  const exprt &chr, const typet &type)
857 {
858  (void)type; // unused parameter
859  return binary_relation_exprt(chr, ID_gt, from_integer(0xFFFF, chr.type()));
860 }
861 
866  conversion_inputt &target)
867 {
868  return convert_char_function(
870 }
871 
877  const exprt &chr, const typet &type)
878 {
879  (void)type; // unused parameter
880  return in_interval_expr(chr, 0xD800, 0xDFFF);
881 }
882 
887  conversion_inputt &target)
888 {
889  return convert_char_function(
891 }
892 
897  conversion_inputt &target)
898 {
899  const code_function_callt &function_call=target;
900  PRECONDITION(function_call.arguments().size() == 2);
901  const exprt &arg0=function_call.arguments()[0];
902  const exprt &arg1=function_call.arguments()[1];
903  const exprt &result=function_call.lhs();
904  exprt is_low_surrogate=in_interval_expr(arg1, 0xDC00, 0xDFFF);
905  exprt is_high_surrogate=in_interval_expr(arg0, 0xD800, 0xDBFF);
907 }
908 
914  const exprt &chr, const typet &type)
915 {
916  (void)type; // unused parameter
917  std::list<mp_integer>title_case_chars=
918  {0x01C5, 0x01C8, 0x01CB, 0x01F2, 0x1FBC, 0x1FCC, 0x1FFC};
919  exprt::operandst conditions;
920  conditions.push_back(in_list_expr(chr, title_case_chars));
921  conditions.push_back(in_interval_expr(chr, 0x1F88, 0x1F8F));
922  conditions.push_back(in_interval_expr(chr, 0x1F98, 0x1F9F));
923  conditions.push_back(in_interval_expr(chr, 0x1FA8, 0x1FAF));
924  return disjunction(conditions);
925 }
926 
931  conversion_inputt &target)
932 {
933  return convert_char_function(
935 }
936 
941  conversion_inputt &target)
942 {
943  return convert_is_title_case_char(target);
944 }
945 
952  const exprt &chr, const typet &type)
953 {
954  (void)type; // unused parameter
955  // The following set of characters is the general category "Nl" in the
956  // Unicode specification.
957  exprt cond0=in_interval_expr(chr, 0x16EE, 0x16F0);
958  exprt cond1=in_interval_expr(chr, 0x2160, 0x2188);
959  exprt cond2=in_interval_expr(chr, 0x3021, 0x3029);
960  exprt cond3=in_interval_expr(chr, 0x3038, 0x303A);
961  exprt cond4=in_interval_expr(chr, 0xA6E6, 0xA6EF);
962  exprt cond5=in_interval_expr(chr, 0x10140, 0x10174);
963  exprt cond6=in_interval_expr(chr, 0x103D1, 0x103D5);
964  exprt cond7=in_interval_expr(chr, 0x12400, 0x1246E);
965  exprt cond8=in_list_expr(chr, {0x3007, 0x10341, 0x1034A});
966  return or_exprt(
967  or_exprt(or_exprt(cond0, cond1), or_exprt(cond2, cond3)),
968  or_exprt(or_exprt(cond4, cond5), or_exprt(cond6, or_exprt(cond7, cond8))));
969 }
970 
971 
980  const exprt &chr, const typet &type)
981 {
982  exprt::operandst conditions;
983  conditions.push_back(expr_of_is_unicode_identifier_start(chr, type));
984  conditions.push_back(expr_of_is_digit(chr, type));
985  conditions.push_back(expr_of_is_identifier_ignorable(chr, type));
986  return disjunction(conditions);
987 }
988 
993  conversion_inputt &target)
994 {
995  return convert_char_function(
997 }
998 
1003  conversion_inputt &target)
1004 {
1006 }
1007 
1014  const exprt &chr, const typet &type)
1015 {
1016  return or_exprt(
1017  expr_of_is_letter(chr, type), expr_of_is_letter_number(chr, type));
1018 }
1019 
1024  conversion_inputt &target)
1025 {
1026  return convert_char_function(
1028 }
1029 
1034  conversion_inputt &target)
1035 {
1037 }
1038 
1045  conversion_inputt &target)
1046 {
1047  return convert_char_function(
1049 }
1050 
1055  conversion_inputt &target)
1056 {
1057  return convert_is_upper_case_char(target);
1058 }
1059 
1066  const exprt &chr, const typet &type)
1067 {
1068  (void)type; // unused parameter
1069  return binary_relation_exprt(chr, ID_le, from_integer(0x10FFFF, chr.type()));
1070 }
1071 
1076  conversion_inputt &target)
1077 {
1078  return convert_char_function(
1080 }
1081 
1091  const exprt &chr, const typet &type)
1092 {
1093  (void)type; // unused parameter
1094  exprt::operandst conditions;
1095  std::list<mp_integer> space_characters=
1096  {0x20, 0x1680, 0x205F, 0x3000, 0x2028, 0x2029};
1097  conditions.push_back(in_list_expr(chr, space_characters));
1098  conditions.push_back(in_interval_expr(chr, 0x2000, 0x2006));
1099  conditions.push_back(in_interval_expr(chr, 0x2008, 0x200A));
1100  conditions.push_back(in_interval_expr(chr, 0x09, 0x0D));
1101  conditions.push_back(in_interval_expr(chr, 0x1C, 0x1F));
1102  return disjunction(conditions);
1103 }
1104 
1109  conversion_inputt &target)
1110 {
1111  return convert_char_function(
1113 }
1114 
1119  conversion_inputt &target)
1120 {
1121  return convert_is_whitespace_char(target);
1122 }
1123 
1131  const exprt &chr, const typet &type)
1132 {
1133  exprt uDC00=from_integer(0xDC00, type);
1134  exprt u0400=from_integer(0x0400, type);
1135  return plus_exprt(uDC00, mod_exprt(chr, u0400));
1136 }
1137 
1144  const exprt &chr, const typet &type)
1145 {
1146  shl_exprt first_byte(chr, from_integer(8, type));
1147  lshr_exprt second_byte(chr, from_integer(8, type));
1148  return plus_exprt(first_byte, second_byte);
1149 }
1150 
1155  conversion_inputt &target)
1156 {
1157  return convert_char_function(
1159 }
1160 
1170  const exprt &chr, const typet &type)
1171 {
1172  minus_exprt transformed(
1173  plus_exprt(chr, from_integer('a', type)), from_integer('A', type));
1174 
1175  return if_exprt(expr_of_is_ascii_upper_case(chr, type), transformed, chr);
1176 }
1177 
1182  conversion_inputt &target)
1183 {
1184  return convert_char_function(
1186 }
1187 
1192  conversion_inputt &target)
1193 {
1194  return convert_to_lower_case_char(target);
1195 }
1196 
1202  const exprt &chr, const typet &type)
1203 {
1204  std::list<mp_integer> increment_list={0x01C4, 0x01C7, 0x01CA, 0x01F1};
1205  std::list<mp_integer> decrement_list={0x01C6, 0x01C9, 0x01CC, 0x01F3};
1206  exprt plus_8_interval1=in_interval_expr(chr, 0x1F80, 0x1F87);
1207  exprt plus_8_interval2=in_interval_expr(chr, 0x1F90, 0x1F97);
1208  exprt plus_8_interval3=in_interval_expr(chr, 0x1FA0, 0x1FA7);
1209  std::list<mp_integer> plus_9_list={0x1FB3, 0x1FC3, 0x1FF3};
1210  minus_exprt minus_1(chr, from_integer(1, type));
1211  plus_exprt plus_1(chr, from_integer(1, type));
1212  plus_exprt plus_8(chr, from_integer(8, type));
1213  plus_exprt plus_9(chr, from_integer(9, type));
1214  or_exprt plus_8_set(
1215  plus_8_interval1, or_exprt(plus_8_interval2, plus_8_interval3));
1216 
1217  return if_exprt(
1218  in_list_expr(chr, increment_list),
1219  plus_1,
1220  if_exprt(
1221  in_list_expr(chr, decrement_list),
1222  minus_1,
1223  if_exprt(
1224  plus_8_set,
1225  plus_8,
1226  if_exprt(in_list_expr(chr, plus_9_list), plus_9, chr))));
1227 }
1228 
1233  conversion_inputt &target)
1234 {
1235  return convert_char_function(
1237 }
1238 
1243  conversion_inputt &target)
1244 {
1245  return convert_to_title_case_char(target);
1246 }
1247 
1257  const exprt &chr, const typet &type)
1258 {
1259  minus_exprt transformed(
1260  plus_exprt(chr, from_integer('A', type)), from_integer('a', type));
1261 
1262  return if_exprt(expr_of_is_ascii_lower_case(chr, type), transformed, chr);
1263 }
1264 
1269  conversion_inputt &target)
1270 {
1271  return convert_char_function(
1273 }
1274 
1279  conversion_inputt &target)
1280 {
1281  return convert_to_upper_case_char(target);
1282 }
1283 
1291  const code_function_callt &code) const
1292 {
1293  if(code.function().id()==ID_symbol)
1294  {
1295  const irep_idt &function_id=
1297  auto it=conversion_table.find(function_id);
1298  if(it!=conversion_table.end())
1299  return (it->second)(code);
1300  }
1301 
1302  return code;
1303 }
1304 
1307 {
1308  // All methods are listed here in alphabetic order
1309  // The ones that are not supported by this module (though they may be
1310  // supported by the string solver) have no entry in the conversion
1311  // table and are marked in this way:
1312  // Not supported "java::java.lang.Character.<init>()"
1313 
1314  conversion_table["java::java.lang.Character.charCount:(I)I"]=
1316  conversion_table["java::java.lang.Character.charValue:()C"]=
1318 
1319  // Not supported "java::java.lang.Character.codePointAt:([CI)I
1320  // Not supported "java::java.lang.Character.codePointAt:([CII)I"
1321  // Not supported "java::java.lang.Character.codePointAt:"
1322  // "(Ljava.lang.CharSequence;I)I"
1323  // Not supported "java::java.lang.Character.codePointBefore:([CI)I"
1324  // Not supported "java::java.lang.Character.codePointBefore:([CII)I"
1325  // Not supported "java::java.lang.Character.codePointBefore:"
1326  // "(Ljava.lang.CharSequence;I)I"
1327  // Not supported "java::java.lang.Character.codePointCount:([CII)I"
1328  // Not supported "java::java.lang.Character.codePointCount:"
1329  // "(Ljava.lang.CharSequence;II)I"
1330  // Not supported "java::java.lang.Character.compareTo:"
1331  // "(Ljava.lang.Character;)I"
1332 
1333  conversion_table["java::java.lang.Character.compare:(CC)I"]=
1335  conversion_table["java::java.lang.Character.digit:(CI)I"]=
1337  conversion_table["java::java.lang.Character.digit:(II)I"]=
1339 
1340  // Not supported "java::java.lang.Character.equals:(Ljava.lang.Object;)Z"
1341 
1342  conversion_table["java::java.lang.Character.forDigit:(II)C"]=
1344  conversion_table["java::java.lang.Character.getDirectionality:(C)B"]=
1346  conversion_table["java::java.lang.Character.getDirectionality:(I)B"]=
1348 
1349  // Not supported "java::java.lang.Character.getName:(I)Ljava.lang.String;"
1350 
1351  conversion_table["java::java.lang.Character.getNumericValue:(C)I"]=
1353  conversion_table["java::java.lang.Character.getNumericValue:(I)I"]=
1355  conversion_table["java::java.lang.Character.getType:(C)I"]=
1357  conversion_table["java::java.lang.Character.getType:(I)I"]=
1359  conversion_table["java::java.lang.Character.hashCode:()I"]=
1361  conversion_table["java::java.lang.Character.isAlphabetic:(I)Z"]=
1363  conversion_table["java::java.lang.Character.isBmpCodePoint:(I)Z"]=
1365  conversion_table["java::java.lang.Character.isDefined:(C)Z"]=
1367  conversion_table["java::java.lang.Character.isDefined:(I)Z"]=
1369  conversion_table["java::java.lang.Character.isDigit:(C)Z"]=
1371  conversion_table["java::java.lang.Character.isDigit:(I)Z"]=
1373  conversion_table["java::java.lang.Character.isHighSurrogate:(C)Z"]=
1375  conversion_table["java::java.lang.Character.isIdentifierIgnorable:(C)Z"]=
1377  conversion_table["java::java.lang.Character.isIdentifierIgnorable:(I)Z"]=
1379  conversion_table["java::java.lang.Character.isIdeographic:(I)Z"]=
1381  conversion_table["java::java.lang.Character.isISOControl:(C)Z"]=
1383  conversion_table["java::java.lang.Character.isISOControl:(I)Z"]=
1385  conversion_table["java::java.lang.Character.isJavaIdentifierPart:(C)Z"]=
1387  conversion_table["java::java.lang.Character.isJavaIdentifierPart:(I)Z"]=
1389  conversion_table["java::java.lang.Character.isJavaIdentifierStart:(C)Z"]=
1391  conversion_table["java::java.lang.Character.isJavaIdentifierStart:(I)Z"]=
1393  conversion_table["java::java.lang.Character.isJavaLetter:(C)Z"]=
1395  conversion_table["java::java.lang.Character.isJavaLetterOrDigit:(C)Z"]=
1397  conversion_table["java::java.lang.Character.isLetter:(C)Z"]=
1399  conversion_table["java::java.lang.Character.isLetter:(I)Z"]=
1401  conversion_table["java::java.lang.Character.isLetterOrDigit:(C)Z"]=
1403  conversion_table["java::java.lang.Character.isLetterOrDigit:(I)Z"]=
1405  conversion_table["java::java.lang.Character.isLowerCase:(C)Z"]=
1407  conversion_table["java::java.lang.Character.isLowerCase:(I)Z"]=
1409  conversion_table["java::java.lang.Character.isLowSurrogate:(C)Z"]=
1411  conversion_table["java::java.lang.Character.isMirrored:(C)Z"]=
1413  conversion_table["java::java.lang.Character.isMirrored:(I)Z"]=
1415  conversion_table["java::java.lang.Character.isSpace:(C)Z"]=
1417  conversion_table["java::java.lang.Character.isSpaceChar:(C)Z"]=
1419  conversion_table["java::java.lang.Character.isSpaceChar:(I)Z"]=
1421  conversion_table["java::java.lang.Character.isSupplementaryCodePoint:(I)Z"]=
1423  conversion_table["java::java.lang.Character.isSurrogate:(C)Z"]=
1425  conversion_table["java::java.lang.Character.isSurrogatePair:(CC)Z"]=
1427  conversion_table["java::java.lang.Character.isTitleCase:(C)Z"]=
1429  conversion_table["java::java.lang.Character.isTitleCase:(I)Z"]=
1431  conversion_table["java::java.lang.Character.isUnicodeIdentifierPart:(C)Z"]=
1433  conversion_table["java::java.lang.Character.isUnicodeIdentifierPart:(I)Z"]=
1435  conversion_table["java::java.lang.Character.isUnicodeIdentifierStart:(C)Z"]=
1437  conversion_table["java::java.lang.Character.isUnicodeIdentifierStart:(I)Z"]=
1439  conversion_table["java::java.lang.Character.isUpperCase:(C)Z"]=
1441  conversion_table["java::java.lang.Character.isUpperCase:(I)Z"]=
1443  conversion_table["java::java.lang.Character.isValidCodePoint:(I)Z"]=
1445  conversion_table["java::java.lang.Character.isWhitespace:(C)Z"]=
1447  conversion_table["java::java.lang.Character.isWhitespace:(I)Z"]=
1449 
1450  // Not supported "java::java.lang.Character.offsetByCodePoints:([CIIII)I"
1451  // Not supported "java::java.lang.Character.offsetByCodePoints:"
1452  // "(Ljava.lang.CharacterSequence;II)I"
1453 
1454  conversion_table["java::java.lang.Character.reverseBytes:(C)C"]=
1456 
1457  // Not supported "java::java.lang.Character.toChars:(I[CI)I"
1458 
1459  conversion_table["java::java.lang.Character.toLowerCase:(C)C"]=
1461  conversion_table["java::java.lang.Character.toLowerCase:(I)I"]=
1463 
1464  // Not supported "java::java.lang.Character.toString:()Ljava.lang.String;"
1465  // Not supported "java::java.lang.Character.toString:(C)Ljava.lang.String;"
1466 
1467  conversion_table["java::java.lang.Character.toTitleCase:(C)C"]=
1469  conversion_table["java::java.lang.Character.toTitleCase:(I)I"]=
1471  conversion_table["java::java.lang.Character.toUpperCase:(C)C"]=
1473  conversion_table["java::java.lang.Character.toUpperCase:(I)I"]=
1475 
1476  // Not supported "java::java.lang.Character.valueOf:(C)Ljava.lang.Character;"
1477 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
API to expression classes for bitvectors.
Preprocess a goto-programs so that calls to the java Character library are replaced by simple express...
Boolean AND.
Definition: std_expr.h:2125
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition: std_expr.h:762
static codet convert_is_upper_case_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_digit_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_lower_case_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_high_surrogate(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_char_function(exprt(*expr_function)(const exprt &chr, const typet &type), conversion_inputt &target)
converts based on a function on expressions
static codet convert_is_whitespace_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_letter_number(const exprt &chr, const typet &type)
Determines if the specified character is in the LETTER_NUMBER category of Unicode.
static exprt expr_of_is_alphabetic(const exprt &chr, const typet &type)
Determines if the specified character (Unicode code point) is alphabetic.
static codet convert_is_ISO_control_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_lower_case_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_get_numeric_value_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_identifier_ignorable(const exprt &chr, const typet &type)
Determines if the character is one of ignorable in a Java identifier, that is, it is in one of these ...
static codet convert_is_title_case_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_unicode_identifier_part_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_ascii_upper_case(const exprt &chr, const typet &type)
Determines if the specified character is an ASCII uppercase character.
static exprt expr_of_is_defined(const exprt &chr, const typet &type)
Determines if a character is defined in Unicode.
static exprt expr_of_is_unicode_identifier_start(const exprt &chr, const typet &type)
Determines if the specified character is permissible as the first character in a Unicode identifier.
std::unordered_map< irep_idt, conversion_functiont > conversion_table
static codet convert_is_surrogate_pair(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_to_title_case_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_compare(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_space_char_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_char_count(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_char_value(const exprt &chr, const typet &type)
Casts the given expression to the given type.
static exprt expr_of_is_high_surrogate(const exprt &chr, const typet &type)
Determines if the given char value is a Unicode high-surrogate code unit (also known as leading-surro...
static codet convert_is_space(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_space_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_valid_code_point(const exprt &chr, const typet &type)
Determines whether the specified code point is a valid Unicode code point value.
static exprt in_interval_expr(const exprt &chr, const mp_integer &lower_bound, const mp_integer &upper_bound)
The returned expression is true when the first argument is in the interval defined by the lower and u...
void initialize_conversion_table()
fill maps with correspondance from java method names to conversion functions
static exprt expr_of_is_letter_or_digit(const exprt &chr, const typet &type)
Determines if the specified character is a letter or digit.
static codet convert_is_unicode_identifier_part_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_mirrored(const exprt &chr, const typet &type)
Determines whether the character is mirrored according to the Unicode specification.
static codet convert_to_upper_case_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_mirrored_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_java_letter(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_defined_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_identifier_ignorable_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_whitespace(const exprt &chr, const typet &type)
Determines if the specified character is white space according to Java.
codet replace_character_call(const code_function_callt &call) const
replace function calls to functions of the Character by an affectation if possible,...
static codet convert_is_letter_or_digit_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_unicode_identifier_part(const exprt &chr, const typet &type)
Determines if the character may be part of a Unicode identifier.
static codet convert_is_ideographic(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_get_type_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_to_lower_case(const exprt &chr, const typet &type)
Converts the character argument to lowercase.
static codet convert_is_bmp_code_point(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_supplementary_code_point(const exprt &chr, const typet &type)
Determines whether the specified character (Unicode code point) is in the supplementary character ran...
static exprt expr_of_reverse_bytes(const exprt &chr, const typet &type)
Returns the value obtained by reversing the order of the bytes in the specified char value.
static codet convert_is_defined_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_to_title_case(const exprt &chr, const typet &type)
Converts the character argument to titlecase.
static codet convert_is_supplementary_code_point(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_surrogate(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_char_value(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_to_lower_case_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_mirrored_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_title_case_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_to_title_case_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_low_surrogate(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_get_directionality_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_ascii_lower_case(const exprt &chr, const typet &type)
Determines if the specified character is an ASCII lowercase character.
static codet convert_hash_code(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_unicode_identifier_start_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt in_list_expr(const exprt &chr, const std::list< mp_integer > &list)
The returned expression is true when the given character is equal to one of the element in the list.
static codet convert_is_java_identifier_start_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method isJavaIdent...
static codet convert_for_digit(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_get_type_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_high_surrogate(const exprt &chr, const typet &type)
Returns the leading surrogate (a high surrogate code unit) of the surrogate pair representing the spe...
static codet convert_is_upper_case_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_digit_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_java_identifier_part_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_java_identifier_start_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method isJavaIdent...
static codet convert_is_identifier_ignorable_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_ISO_control_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_unicode_identifier_start_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_char_count(const exprt &chr, const typet &type)
Determines the number of char values needed to represent the specified character (Unicode code point)...
static codet convert_is_java_letter_or_digit(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method isJavaLette...
static exprt expr_of_is_space_char(const exprt &chr, const typet &type)
Determines if the specified character is white space according to Unicode (SPACE_SEPARATOR,...
static exprt expr_of_is_digit(const exprt &chr, const typet &type)
Determines if the specified character is a digit.
static exprt expr_of_is_bmp_code_point(const exprt &chr, const typet &type)
Determines whether the specified character (Unicode code point) is in the Basic Multilingual Plane (B...
static codet convert_reverse_bytes(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_surrogate(const exprt &chr, const typet &type)
Determines if the given char value is a Unicode surrogate code unit.
static codet convert_get_numeric_value_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_to_upper_case_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_letter_or_digit_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_letter_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_digit_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_get_directionality_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_to_lower_case_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_letter_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_whitespace_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_is_letter(const exprt &chr, const typet &type)
Determines if the specified character is a letter.
static exprt expr_of_is_title_case(const exprt &chr, const typet &type)
Determines if the specified character is a titlecase character.
static codet convert_digit_char(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static exprt expr_of_to_upper_case(const exprt &chr, const typet &type)
Converts the character argument to uppercase.
static codet convert_is_valid_code_point(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_alphabetic(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method Character....
static codet convert_is_java_identifier_part_int(conversion_inputt &target)
Converts function call to an assignment of an expression corresponding to the java method isJavaIdent...
static exprt expr_of_low_surrogate(const exprt &chr, const typet &type)
Returns the trailing surrogate (a low surrogate code unit) of the surrogate pair representing the spe...
A goto_instruction_codet representing an assignment in the program.
goto_instruction_codet representation of a function call statement.
Data structure for representing an arbitrary statement in a program.
Definition: std_code_base.h:29
Division.
Definition: std_expr.h:1157
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
Equality.
Definition: std_expr.h:1366
Base class for all expressions.
Definition: expr.h:56
std::vector< exprt > operandst
Definition: expr.h:58
typet & type()
Return the type of the expression.
Definition: expr.h:84
The trinary if-then-else operator.
Definition: std_expr.h:2380
const irep_idt & id() const
Definition: irep.h:388
Logical right shift.
Binary minus.
Definition: std_expr.h:1061
Modulo defined as lhs-(rhs * truncate(lhs/rhs)).
Definition: std_expr.h:1228
Boolean negation.
Definition: std_expr.h:2337
Boolean OR.
Definition: std_expr.h:2233
The plus expression Associativity is not specified.
Definition: std_expr.h:1002
Left shift.
const irep_idt & get_identifier() const
Definition: std_expr.h:160
Semantic type conversion.
Definition: std_expr.h:2073
The type of an expression, extends irept.
Definition: type.h:29
BigInt mp_integer
Definition: smt_terms.h:17
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
exprt disjunction(const exprt::operandst &op)
1) generates a disjunction for two or more operands 2) for one operand, returns the operand 3) return...
Definition: std_expr.cpp:71
API to expression classes.
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:272
static exprt is_low_surrogate(const exprt &chr)
the output is true when the character is a low surrogate for UTF-16 encoding, see https://en....
static exprt is_high_surrogate(const exprt &chr)
the output is true when the character is a high surrogate for UTF-16 encoding, see https://en....