CBMC
goto_symex.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Symbolic Execution
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
12 #include "goto_symex.h"
13 
14 #include "expr_skeleton.h"
15 #include "symex_assign.h"
16 
17 #include <util/arith_tools.h>
18 #include <util/c_types.h>
19 #include <util/format_expr.h>
20 #include <util/fresh_symbol.h>
21 #include <util/mathematical_expr.h>
24 #include <util/simplify_expr.h>
25 #include <util/simplify_utils.h>
26 #include <util/std_code.h>
27 #include <util/string_expr.h>
28 #include <util/string_utils.h>
29 
30 #include <climits>
31 
33 {
35  simplify(expr, ns);
36 }
37 
39  statet &state,
40  const exprt &o_lhs,
41  const exprt &o_rhs)
42 {
43  exprt lhs = clean_expr(o_lhs, state, true);
44  exprt rhs = clean_expr(o_rhs, state, false);
45 
47  lhs.type() == rhs.type(),
48  "assignments must be type consistent, got",
49  lhs.type().pretty(),
50  rhs.type().pretty(),
51  state.source.pc->source_location());
52 
54  log.debug(), [this, &lhs](messaget::mstreamt &mstream) {
55  mstream << "Assignment to " << format(lhs) << " ["
56  << pointer_offset_bits(lhs.type(), ns).value_or(0) << " bits]"
57  << messaget::eom;
58  });
59 
60  // rvalues present within the lhs (for example, "some_array[this_rvalue]" or
61  // "byte_extract <type> from an_lvalue offset this_rvalue") can affect whether
62  // we use field-sensitive symbols or not, so L2-rename them up front:
63  lhs = state.l2_rename_rvalues(lhs, ns);
64  do_simplify(lhs);
65  lhs = state.field_sensitivity.apply(ns, state, std::move(lhs), true);
66 
67  if(rhs.id() == ID_side_effect)
68  {
69  const side_effect_exprt &side_effect_expr = to_side_effect_expr(rhs);
70  const irep_idt &statement = side_effect_expr.get_statement();
71 
72  if(
73  statement == ID_cpp_new || statement == ID_cpp_new_array ||
74  statement == ID_java_new_array_data)
75  {
76  symex_cpp_new(state, lhs, side_effect_expr);
77  }
78  else if(statement == ID_allocate)
79  symex_allocate(state, lhs, side_effect_expr);
80  else if(statement == ID_va_start)
81  symex_va_start(state, lhs, side_effect_expr);
82  else
84  }
85  else
86  {
88 
89  // Let's hide return value assignments.
90  if(
91  lhs.id() == ID_symbol &&
92  id2string(to_symbol_expr(lhs).get_identifier()).find("#return_value!") !=
93  std::string::npos)
94  {
96  }
97 
98  // We hide if we are in a hidden function.
99  if(state.call_stack().top().hidden_function)
100  assignment_type = symex_targett::assignment_typet::HIDDEN;
101 
102  // We hide if we are executing a hidden instruction.
103  if(state.source.pc->source_location().get_hide())
104  assignment_type = symex_targett::assignment_typet::HIDDEN;
105 
107  shadow_memory, state, assignment_type, ns, symex_config, target};
108 
109  // Try to constant propagate potential side effects of the assignment, when
110  // simplification is turned on and there is one thread only. Constant
111  // propagation is only sound for sequential code as here we do not take into
112  // account potential writes from other threads when propagating the values.
113  if(symex_config.simplify_opt && state.threads.size() <= 1)
114  {
116  state, symex_assign, lhs, rhs))
117  return;
118  }
119 
120  // We may end up reading (and writing) all components of an object composed
121  // of multiple fields. In such cases, we must do so atomically to avoid
122  // overwriting components modified by another thread. Note that this also
123  // implies multiple shared reads on the rhs being treated as atomic.
124  const bool maybe_divisible =
125  lhs.id() == ID_index ||
126  (is_ssa_expr(lhs) &&
127  state.field_sensitivity.is_divisible(to_ssa_expr(lhs), false));
128  const bool need_atomic_section = maybe_divisible &&
129  state.threads.size() > 1 &&
130  state.atomic_section_id == 0;
131 
132  if(need_atomic_section)
133  symex_atomic_begin(state);
134 
135  exprt::operandst lhs_if_then_else_conditions;
137  shadow_memory, state, assignment_type, ns, symex_config, target}
138  .assign_rec(lhs, expr_skeletont{}, rhs, lhs_if_then_else_conditions);
139 
140  if(need_atomic_section)
141  symex_atomic_end(state);
142  }
143 }
144 
152 static std::string get_alnum_string(const array_exprt &char_array)
153 {
154  const auto &ibv_type =
156 
157  const std::size_t n_bits = ibv_type.get_width();
158  CHECK_RETURN(n_bits % 8 == 0);
159 
160  static_assert(CHAR_BIT == 8, "bitwidth of char assumed to be 8");
161 
162  const std::size_t n_chars = n_bits / 8;
163 
164  INVARIANT(
165  sizeof(std::size_t) >= n_chars,
166  "size_t shall be large enough to represent a character");
167 
168  std::string result;
169 
170  for(const auto &c : char_array.operands())
171  {
172  const std::size_t c_val = numeric_cast_v<std::size_t>(to_constant_expr(c));
173 
174  for(std::size_t i = 0; i < n_chars; i++)
175  {
176  const char c_chunk = static_cast<char>((c_val >> (i * 8)) & 0xff);
177  result.push_back(c_chunk);
178  }
179  }
180 
181  return escape_non_alnum(result);
182 }
183 
185  statet &state,
186  symex_assignt &symex_assign,
187  const exprt &lhs,
188  const exprt &rhs)
189 {
190  if(rhs.id() == ID_function_application)
191  {
193 
194  if(f_l1.function().id() == ID_symbol)
195  {
196  const irep_idt &func_id =
198 
199  if(func_id == ID_cprover_string_concat_func)
200  {
201  return constant_propagate_string_concat(state, symex_assign, f_l1);
202  }
203  else if(func_id == ID_cprover_string_empty_string_func)
204  {
205  // constant propagating the empty string always succeeds as it does
206  // not depend on potentially non-constant inputs
208  return true;
209  }
210  else if(func_id == ID_cprover_string_substring_func)
211  {
213  }
214  else if(
215  func_id == ID_cprover_string_of_int_func ||
216  func_id == ID_cprover_string_of_long_func)
217  {
219  }
220  else if(func_id == ID_cprover_string_delete_char_at_func)
221  {
222  return constant_propagate_delete_char_at(state, symex_assign, f_l1);
223  }
224  else if(func_id == ID_cprover_string_delete_func)
225  {
226  return constant_propagate_delete(state, symex_assign, f_l1);
227  }
228  else if(func_id == ID_cprover_string_set_length_func)
229  {
230  return constant_propagate_set_length(state, symex_assign, f_l1);
231  }
232  else if(func_id == ID_cprover_string_char_set_func)
233  {
234  return constant_propagate_set_char_at(state, symex_assign, f_l1);
235  }
236  else if(func_id == ID_cprover_string_trim_func)
237  {
238  return constant_propagate_trim(state, symex_assign, f_l1);
239  }
240  else if(func_id == ID_cprover_string_to_lower_case_func)
241  {
242  return constant_propagate_case_change(state, symex_assign, f_l1, false);
243  }
244  else if(func_id == ID_cprover_string_to_upper_case_func)
245  {
246  return constant_propagate_case_change(state, symex_assign, f_l1, true);
247  }
248  else if(func_id == ID_cprover_string_replace_func)
249  {
250  return constant_propagate_replace(state, symex_assign, f_l1);
251  }
252  }
253  }
254 
255  return false;
256 }
257 
259  statet &state,
260  symex_assignt &symex_assign,
261  const ssa_exprt &length,
262  const constant_exprt &new_length,
263  const ssa_exprt &char_array,
264  const array_exprt &new_char_array)
265 {
266  // We need to make sure that the length of the previous array isn't
267  // unconstrained, otherwise it could be arbitrarily large which causes
268  // invalid traces
269  symex_assume(state, equal_exprt{length, from_integer(0, length.type())});
270 
271  // assign length of string
272  symex_assign.assign_symbol(length, expr_skeletont{}, new_length, {});
273 
274  const std::string aux_symbol_name =
275  get_alnum_string(new_char_array) + "_constant_char_array";
276 
277  const bool string_constant_exists =
278  state.symbol_table.has_symbol(aux_symbol_name);
279 
280  const symbolt &aux_symbol =
281  string_constant_exists
282  ? state.symbol_table.lookup_ref(aux_symbol_name)
284  state, symex_assign, aux_symbol_name, char_array, new_char_array);
285 
286  INVARIANT(
287  aux_symbol.value == new_char_array,
288  "symbol shall have value derived from char array content");
289 
290  const address_of_exprt string_data(
291  index_exprt(aux_symbol.symbol_expr(), from_integer(0, c_index_type())));
292 
293  symex_assign.assign_symbol(char_array, expr_skeletont{}, string_data, {});
294 
295  if(!string_constant_exists)
296  {
298  state, symex_assign, new_char_array, string_data);
299  }
300 }
301 
303  statet &state,
304  symex_assignt &symex_assign,
305  const std::string &aux_symbol_name,
306  const ssa_exprt &char_array,
307  const array_exprt &new_char_array)
308 {
309  array_typet new_char_array_type = new_char_array.type();
310  new_char_array_type.set(ID_C_constant, true);
311 
312  symbolt &new_aux_symbol = get_fresh_aux_symbol(
313  new_char_array_type,
314  "",
315  aux_symbol_name,
317  ns.lookup(to_symbol_expr(char_array.get_original_expr())).mode,
318  ns,
319  state.symbol_table);
320 
321  CHECK_RETURN(new_aux_symbol.is_state_var);
322  CHECK_RETURN(new_aux_symbol.is_lvalue);
323 
324  new_aux_symbol.is_static_lifetime = true;
325  new_aux_symbol.is_file_local = false;
326  new_aux_symbol.is_thread_local = false;
327 
328  new_aux_symbol.value = new_char_array;
329 
330  const exprt arr = state.rename(new_aux_symbol.symbol_expr(), ns).get();
331 
332  symex_assign.assign_symbol(
333  to_ssa_expr(arr).get_l1_object(), expr_skeletont{}, new_char_array, {});
334 
335  return new_aux_symbol;
336 }
337 
339  statet &state,
340  symex_assignt &symex_assign,
341  const array_exprt &new_char_array,
342  const address_of_exprt &string_data)
343 {
344  const symbolt &function_symbol =
345  ns.lookup(ID_cprover_associate_array_to_pointer_func);
346 
347  const function_application_exprt array_to_pointer_app{
348  function_symbol.symbol_expr(), {new_char_array, string_data}};
349 
350  symbol_exprt return_symbol_expr =
352  to_mathematical_function_type(function_symbol.type).codomain(),
353  "",
354  "return_value",
356  function_symbol.mode,
357  ns,
358  state.symbol_table)
359  .symbol_expr();
360 
361  const ssa_exprt ssa_expr(return_symbol_expr);
362 
363  symex_assign.assign_symbol(
364  ssa_expr, expr_skeletont{}, array_to_pointer_app, {});
365 }
366 
367 std::optional<std::reference_wrapper<const array_exprt>>
369  const statet &state,
370  const exprt &content)
371 {
372  if(content.id() != ID_symbol)
373  {
374  return {};
375  }
376 
377  const auto s_pointer_opt =
378  state.propagation.find(to_symbol_expr(content).get_identifier());
379 
380  if(!s_pointer_opt)
381  {
382  return {};
383  }
384 
385  return try_get_string_data_array(s_pointer_opt->get(), ns);
386 }
387 
388 std::optional<std::reference_wrapper<const constant_exprt>>
390 {
391  if(expr.id() != ID_symbol)
392  {
393  return {};
394  }
395 
396  const auto constant_expr_opt =
397  state.propagation.find(to_symbol_expr(expr).get_identifier());
398 
399  if(!constant_expr_opt || !constant_expr_opt->get().is_constant())
400  {
401  return {};
402  }
403 
404  return std::optional<std::reference_wrapper<const constant_exprt>>(
405  to_constant_expr(constant_expr_opt->get()));
406 }
407 
409  statet &state,
410  symex_assignt &symex_assign,
411  const function_application_exprt &f_l1)
412 {
413  const auto &f_type = f_l1.function_type();
414  const auto &length_type = f_type.domain().at(0);
415  const auto &char_type = to_pointer_type(f_type.domain().at(1)).base_type();
416 
417  const constant_exprt length = from_integer(0, length_type);
418 
419  const array_typet char_array_type(char_type, length);
420 
422  f_l1.arguments().size() >= 2,
423  "empty string primitive requires two output arguments");
424 
425  const array_exprt char_array({}, char_array_type);
426 
428  state,
429  symex_assign,
430  to_ssa_expr(f_l1.arguments().at(0)),
431  length,
432  to_ssa_expr(f_l1.arguments().at(1)),
433  char_array);
434 }
435 
437  statet &state,
438  symex_assignt &symex_assign,
439  const function_application_exprt &f_l1)
440 {
441  const auto &f_type = f_l1.function_type();
442  const auto &length_type = f_type.domain().at(0);
443  const auto &char_type = to_pointer_type(f_type.domain().at(1)).base_type();
444 
445  const refined_string_exprt &s1 = to_string_expr(f_l1.arguments().at(2));
446  const auto s1_data_opt = try_evaluate_constant_string(state, s1.content());
447 
448  if(!s1_data_opt)
449  return false;
450 
451  const array_exprt &s1_data = s1_data_opt->get();
452 
453  const refined_string_exprt &s2 = to_string_expr(f_l1.arguments().at(3));
454  const auto s2_data_opt = try_evaluate_constant_string(state, s2.content());
455 
456  if(!s2_data_opt)
457  return false;
458 
459  const array_exprt &s2_data = s2_data_opt->get();
460 
461  const std::size_t new_size =
462  s1_data.operands().size() + s2_data.operands().size();
463 
464  const constant_exprt new_char_array_length =
465  from_integer(new_size, length_type);
466 
467  const array_typet new_char_array_type(char_type, new_char_array_length);
468 
469  exprt::operandst operands(s1_data.operands());
470  operands.insert(
471  operands.end(), s2_data.operands().begin(), s2_data.operands().end());
472 
473  const array_exprt new_char_array(std::move(operands), new_char_array_type);
474 
476  state,
477  symex_assign,
478  to_ssa_expr(f_l1.arguments().at(0)),
479  new_char_array_length,
480  to_ssa_expr(f_l1.arguments().at(1)),
481  new_char_array);
482 
483  return true;
484 }
485 
487  statet &state,
488  symex_assignt &symex_assign,
489  const function_application_exprt &f_l1)
490 {
491  const std::size_t num_operands = f_l1.arguments().size();
492 
493  PRECONDITION(num_operands >= 4);
494  PRECONDITION(num_operands <= 5);
495 
496  const auto &f_type = f_l1.function_type();
497  const auto &length_type = f_type.domain().at(0);
498  const auto &char_type = to_pointer_type(f_type.domain().at(1)).base_type();
499 
500  const refined_string_exprt &s = to_string_expr(f_l1.arguments().at(2));
501  const auto s_data_opt = try_evaluate_constant_string(state, s.content());
502 
503  if(!s_data_opt)
504  return false;
505 
506  const array_exprt &s_data = s_data_opt->get();
507 
508  mp_integer end_index;
509 
510  if(num_operands == 5)
511  {
512  const auto end_index_expr_opt =
513  try_evaluate_constant(state, f_l1.arguments().at(4));
514 
515  if(!end_index_expr_opt)
516  {
517  return false;
518  }
519 
520  end_index =
521  numeric_cast_v<mp_integer>(to_constant_expr(*end_index_expr_opt));
522 
523  if(end_index < 0 || end_index > s_data.operands().size())
524  {
525  return false;
526  }
527  }
528  else
529  {
530  end_index = s_data.operands().size();
531  }
532 
533  const auto start_index_expr_opt =
534  try_evaluate_constant(state, f_l1.arguments().at(3));
535 
536  if(!start_index_expr_opt)
537  {
538  return false;
539  }
540 
541  const mp_integer start_index =
542  numeric_cast_v<mp_integer>(to_constant_expr(*start_index_expr_opt));
543 
544  if(start_index < 0 || start_index > end_index)
545  {
546  return false;
547  }
548 
549  const constant_exprt new_char_array_length =
550  from_integer(end_index - start_index, length_type);
551 
552  const array_typet new_char_array_type(char_type, new_char_array_length);
553 
554  exprt::operandst operands(
555  std::next(
556  s_data.operands().begin(), numeric_cast_v<std::size_t>(start_index)),
557  std::next(
558  s_data.operands().begin(), numeric_cast_v<std::size_t>(end_index)));
559 
560  const array_exprt new_char_array(std::move(operands), new_char_array_type);
561 
563  state,
564  symex_assign,
565  to_ssa_expr(f_l1.arguments().at(0)),
566  new_char_array_length,
567  to_ssa_expr(f_l1.arguments().at(1)),
568  new_char_array);
569 
570  return true;
571 }
572 
574  statet &state,
575  symex_assignt &symex_assign,
576  const function_application_exprt &f_l1)
577 {
578  // The function application expression f_l1 takes the following arguments:
579  // - result string length (output parameter)
580  // - result string data array (output parameter)
581  // - integer to convert to a string
582  // - radix (optional, default 10)
583  const std::size_t num_operands = f_l1.arguments().size();
584 
585  PRECONDITION(num_operands >= 3);
586  PRECONDITION(num_operands <= 4);
587 
588  const auto &f_type = f_l1.function_type();
589  const auto &length_type = f_type.domain().at(0);
590  const auto &char_type = to_pointer_type(f_type.domain().at(1)).base_type();
591 
592  const auto &integer_opt =
593  try_evaluate_constant(state, f_l1.arguments().at(2));
594 
595  if(!integer_opt)
596  {
597  return false;
598  }
599 
600  const mp_integer integer = numeric_cast_v<mp_integer>(integer_opt->get());
601 
602  unsigned base = 10;
603 
604  if(num_operands == 4)
605  {
606  const auto &base_constant_opt =
607  try_evaluate_constant(state, f_l1.arguments().at(3));
608 
609  if(!base_constant_opt)
610  {
611  return false;
612  }
613 
614  const auto base_opt = numeric_cast<unsigned>(base_constant_opt->get());
615 
616  if(!base_opt)
617  {
618  return false;
619  }
620 
621  base = *base_opt;
622  }
623 
624  std::string s = integer2string(integer, base);
625 
626  const constant_exprt new_char_array_length =
627  from_integer(s.length(), length_type);
628 
629  const array_typet new_char_array_type(char_type, new_char_array_length);
630 
631  exprt::operandst operands;
632 
634  s.begin(),
635  s.end(),
636  std::back_inserter(operands),
637  [&char_type](const char c) { return from_integer(tolower(c), char_type); });
638 
639  const array_exprt new_char_array(std::move(operands), new_char_array_type);
640 
642  state,
643  symex_assign,
644  to_ssa_expr(f_l1.arguments().at(0)),
645  new_char_array_length,
646  to_ssa_expr(f_l1.arguments().at(1)),
647  new_char_array);
648 
649  return true;
650 }
651 
653  statet &state,
654  symex_assignt &symex_assign,
655  const function_application_exprt &f_l1)
656 {
657  // The function application expression f_l1 takes the following arguments:
658  // - result string length (output parameter)
659  // - result string data array (output parameter)
660  // - string to delete char from
661  // - index of char to delete
662  PRECONDITION(f_l1.arguments().size() == 4);
663 
664  const auto &f_type = f_l1.function_type();
665  const auto &length_type = f_type.domain().at(0);
666  const auto &char_type = to_pointer_type(f_type.domain().at(1)).base_type();
667 
668  const refined_string_exprt &s = to_string_expr(f_l1.arguments().at(2));
669  const auto s_data_opt = try_evaluate_constant_string(state, s.content());
670 
671  if(!s_data_opt)
672  {
673  return false;
674  }
675 
676  const array_exprt &s_data = s_data_opt->get();
677 
678  const auto &index_opt = try_evaluate_constant(state, f_l1.arguments().at(3));
679 
680  if(!index_opt)
681  {
682  return false;
683  }
684 
685  const mp_integer index = numeric_cast_v<mp_integer>(index_opt->get());
686 
687  if(index < 0 || index >= s_data.operands().size())
688  {
689  return false;
690  }
691 
692  const constant_exprt new_char_array_length =
693  from_integer(s_data.operands().size() - 1, length_type);
694 
695  const array_typet new_char_array_type(char_type, new_char_array_length);
696 
697  exprt::operandst operands;
698  operands.reserve(s_data.operands().size() - 1);
699 
700  const std::size_t i = numeric_cast_v<std::size_t>(index);
701 
702  operands.insert(
703  operands.end(),
704  s_data.operands().begin(),
705  std::next(s_data.operands().begin(), i));
706 
707  operands.insert(
708  operands.end(),
709  std::next(s_data.operands().begin(), i + 1),
710  s_data.operands().end());
711 
712  const array_exprt new_char_array(std::move(operands), new_char_array_type);
713 
715  state,
716  symex_assign,
717  to_ssa_expr(f_l1.arguments().at(0)),
718  new_char_array_length,
719  to_ssa_expr(f_l1.arguments().at(1)),
720  new_char_array);
721 
722  return true;
723 }
724 
726  statet &state,
727  symex_assignt &symex_assign,
728  const function_application_exprt &f_l1)
729 {
730  // The function application expression f_l1 takes the following arguments:
731  // - result string length (output parameter)
732  // - result string data array (output parameter)
733  // - string to delete substring from
734  // - index of start of substring to delete (inclusive)
735  // - index of end of substring to delete (exclusive)
736  PRECONDITION(f_l1.arguments().size() == 5);
737 
738  const auto &f_type = f_l1.function_type();
739  const auto &length_type = f_type.domain().at(0);
740  const auto &char_type = to_pointer_type(f_type.domain().at(1)).base_type();
741 
742  const refined_string_exprt &s = to_string_expr(f_l1.arguments().at(2));
743  const auto s_data_opt = try_evaluate_constant_string(state, s.content());
744 
745  if(!s_data_opt)
746  {
747  return false;
748  }
749 
750  const array_exprt &s_data = s_data_opt->get();
751 
752  const auto &start_opt = try_evaluate_constant(state, f_l1.arguments().at(3));
753 
754  if(!start_opt)
755  {
756  return false;
757  }
758 
759  const mp_integer start = numeric_cast_v<mp_integer>(start_opt->get());
760 
761  if(start < 0 || start > s_data.operands().size())
762  {
763  return false;
764  }
765 
766  const auto &end_opt = try_evaluate_constant(state, f_l1.arguments().at(4));
767 
768  if(!end_opt)
769  {
770  return false;
771  }
772 
773  const mp_integer end = numeric_cast_v<mp_integer>(end_opt->get());
774 
775  if(start > end)
776  {
777  return false;
778  }
779 
780  const std::size_t start_index = numeric_cast_v<std::size_t>(start);
781 
782  const std::size_t end_index =
783  std::min(numeric_cast_v<std::size_t>(end), s_data.operands().size());
784 
785  const std::size_t new_size =
786  s_data.operands().size() - end_index + start_index;
787 
788  const constant_exprt new_char_array_length =
789  from_integer(new_size, length_type);
790 
791  const array_typet new_char_array_type(char_type, new_char_array_length);
792 
793  exprt::operandst operands;
794  operands.reserve(new_size);
795 
796  operands.insert(
797  operands.end(),
798  s_data.operands().begin(),
799  std::next(s_data.operands().begin(), start_index));
800 
801  operands.insert(
802  operands.end(),
803  std::next(s_data.operands().begin(), end_index),
804  s_data.operands().end());
805 
806  const array_exprt new_char_array(std::move(operands), new_char_array_type);
807 
809  state,
810  symex_assign,
811  to_ssa_expr(f_l1.arguments().at(0)),
812  new_char_array_length,
813  to_ssa_expr(f_l1.arguments().at(1)),
814  new_char_array);
815 
816  return true;
817 }
818 
820  statet &state,
821  symex_assignt &symex_assign,
822  const function_application_exprt &f_l1)
823 {
824  // The function application expression f_l1 takes the following arguments:
825  // - result string length (output parameter)
826  // - result string data array (output parameter)
827  // - current string
828  // - new length of the string
829  PRECONDITION(f_l1.arguments().size() == 4);
830 
831  const auto &f_type = f_l1.function_type();
832  const auto &length_type = f_type.domain().at(0);
833  const auto &char_type = to_pointer_type(f_type.domain().at(1)).base_type();
834 
835  const auto &new_length_opt =
836  try_evaluate_constant(state, f_l1.arguments().at(3));
837 
838  if(!new_length_opt)
839  {
840  return false;
841  }
842 
843  const mp_integer new_length =
844  numeric_cast_v<mp_integer>(new_length_opt->get());
845 
846  if(new_length < 0)
847  {
848  return false;
849  }
850 
851  const std::size_t new_size = numeric_cast_v<std::size_t>(new_length);
852 
853  const constant_exprt new_char_array_length =
854  from_integer(new_size, length_type);
855 
856  const array_typet new_char_array_type(char_type, new_char_array_length);
857 
858  exprt::operandst operands;
859 
860  if(new_size != 0)
861  {
862  operands.reserve(new_size);
863 
864  const refined_string_exprt &s = to_string_expr(f_l1.arguments().at(2));
865  const auto s_data_opt = try_evaluate_constant_string(state, s.content());
866 
867  if(!s_data_opt)
868  {
869  return false;
870  }
871 
872  const array_exprt &s_data = s_data_opt->get();
873 
874  operands.insert(
875  operands.end(),
876  s_data.operands().begin(),
877  std::next(
878  s_data.operands().begin(),
879  std::min(new_size, s_data.operands().size())));
880 
881  operands.insert(
882  operands.end(),
883  new_size - std::min(new_size, s_data.operands().size()),
884  from_integer(0, char_type));
885  }
886 
887  const array_exprt new_char_array(std::move(operands), new_char_array_type);
888 
890  state,
891  symex_assign,
892  to_ssa_expr(f_l1.arguments().at(0)),
893  new_char_array_length,
894  to_ssa_expr(f_l1.arguments().at(1)),
895  new_char_array);
896 
897  return true;
898 }
899 
901  statet &state,
902  symex_assignt &symex_assign,
903  const function_application_exprt &f_l1)
904 {
905  // The function application expression f_l1 takes the following arguments:
906  // - result string length (output parameter)
907  // - result string data array (output parameter)
908  // - current string
909  // - index of char to set
910  // - new char
911  PRECONDITION(f_l1.arguments().size() == 5);
912 
913  const auto &f_type = f_l1.function_type();
914  const auto &length_type = f_type.domain().at(0);
915  const auto &char_type = to_pointer_type(f_type.domain().at(1)).base_type();
916 
917  const refined_string_exprt &s = to_string_expr(f_l1.arguments().at(2));
918  const auto s_data_opt = try_evaluate_constant_string(state, s.content());
919 
920  if(!s_data_opt)
921  {
922  return false;
923  }
924 
925  array_exprt s_data = s_data_opt->get();
926 
927  const auto &index_opt = try_evaluate_constant(state, f_l1.arguments().at(3));
928 
929  if(!index_opt)
930  {
931  return false;
932  }
933 
934  const mp_integer index = numeric_cast_v<mp_integer>(index_opt->get());
935 
936  if(index < 0 || index >= s_data.operands().size())
937  {
938  return false;
939  }
940 
941  const auto &new_char_opt =
942  try_evaluate_constant(state, f_l1.arguments().at(4));
943 
944  if(!new_char_opt)
945  {
946  return false;
947  }
948 
949  const constant_exprt new_char_array_length =
950  from_integer(s_data.operands().size(), length_type);
951 
952  const array_typet new_char_array_type(char_type, new_char_array_length);
953 
954  s_data.operands()[numeric_cast_v<std::size_t>(index)] = new_char_opt->get();
955 
956  const array_exprt new_char_array(
957  std::move(s_data.operands()), new_char_array_type);
958 
960  state,
961  symex_assign,
962  to_ssa_expr(f_l1.arguments().at(0)),
963  new_char_array_length,
964  to_ssa_expr(f_l1.arguments().at(1)),
965  new_char_array);
966 
967  return true;
968 }
969 
971  statet &state,
972  symex_assignt &symex_assign,
973  const function_application_exprt &f_l1,
974  bool to_upper)
975 {
976  const auto &f_type = f_l1.function_type();
977  const auto &length_type = f_type.domain().at(0);
978  const auto &char_type = to_pointer_type(f_type.domain().at(1)).base_type();
979 
980  const refined_string_exprt &s = to_string_expr(f_l1.arguments().at(2));
981  const auto s_data_opt = try_evaluate_constant_string(state, s.content());
982 
983  if(!s_data_opt)
984  return false;
985 
986  array_exprt string_data = s_data_opt->get();
987 
988  auto &operands = string_data.operands();
989  for(auto &operand : operands)
990  {
991  auto &constant_value = to_constant_expr(operand);
992  auto character = numeric_cast_v<unsigned int>(constant_value);
993 
994  // Can't guarantee matches against non-ASCII characters.
995  if(character >= 128)
996  return false;
997 
998  if(isalpha(character))
999  {
1000  if(to_upper)
1001  {
1002  if(islower(character))
1003  constant_value =
1004  from_integer(toupper(character), constant_value.type());
1005  }
1006  else
1007  {
1008  if(isupper(character))
1009  constant_value =
1010  from_integer(tolower(character), constant_value.type());
1011  }
1012  }
1013  }
1014 
1015  const constant_exprt new_char_array_length =
1016  from_integer(operands.size(), length_type);
1017 
1018  const array_typet new_char_array_type(char_type, new_char_array_length);
1019  const array_exprt new_char_array(std::move(operands), new_char_array_type);
1020 
1022  state,
1023  symex_assign,
1024  to_ssa_expr(f_l1.arguments().at(0)),
1025  new_char_array_length,
1026  to_ssa_expr(f_l1.arguments().at(1)),
1027  new_char_array);
1028 
1029  return true;
1030 }
1031 
1033  statet &state,
1034  symex_assignt &symex_assign,
1035  const function_application_exprt &f_l1)
1036 {
1037  const auto &f_type = f_l1.function_type();
1038  const auto &length_type = f_type.domain().at(0);
1039  const auto &char_type = to_pointer_type(f_type.domain().at(1)).base_type();
1040 
1041  const refined_string_exprt &s = to_string_expr(f_l1.arguments().at(2));
1042  const auto s_data_opt = try_evaluate_constant_string(state, s.content());
1043 
1044  if(!s_data_opt)
1045  return false;
1046 
1047  auto &new_data = f_l1.arguments().at(4);
1048  auto &old_data = f_l1.arguments().at(3);
1049 
1050  array_exprt::operandst characters_to_find;
1051  array_exprt::operandst characters_to_replace;
1052 
1053  // Two main ways to perform a replace: characters or strings.
1054  bool is_single_character = new_data.type().id() == ID_unsignedbv &&
1055  old_data.type().id() == ID_unsignedbv;
1056  if(is_single_character)
1057  {
1058  const auto new_char_pointer = try_evaluate_constant(state, new_data);
1059  const auto old_char_pointer = try_evaluate_constant(state, old_data);
1060 
1061  if(!new_char_pointer || !old_char_pointer)
1062  {
1063  return {};
1064  }
1065 
1066  characters_to_find.emplace_back(old_char_pointer->get());
1067  characters_to_replace.emplace_back(new_char_pointer->get());
1068  }
1069  else
1070  {
1071  auto &new_char_array = to_string_expr(new_data);
1072  auto &old_char_array = to_string_expr(old_data);
1073 
1074  const auto new_char_array_opt =
1075  try_evaluate_constant_string(state, new_char_array.content());
1076 
1077  const auto old_char_array_opt =
1078  try_evaluate_constant_string(state, old_char_array.content());
1079 
1080  if(!new_char_array_opt || !old_char_array_opt)
1081  {
1082  return {};
1083  }
1084 
1085  characters_to_find = old_char_array_opt->get().operands();
1086  characters_to_replace = new_char_array_opt->get().operands();
1087  }
1088 
1089  // Copy data, then do initial search for a replace sequence.
1090  array_exprt existing_data = s_data_opt->get();
1091  auto found_pattern = std::search(
1092  existing_data.operands().begin(),
1093  existing_data.operands().end(),
1094  characters_to_find.begin(),
1095  characters_to_find.end());
1096 
1097  // If we've found a match, proceed to perform a replace on all instances.
1098  while(found_pattern != existing_data.operands().end())
1099  {
1100  // Find the difference between our first/last match iterator.
1101  auto match_end = found_pattern + characters_to_find.size();
1102 
1103  // Erase them.
1104  found_pattern = existing_data.operands().erase(found_pattern, match_end);
1105 
1106  // Insert our replacement characters, then move the iterator to the end of
1107  // our new sequence.
1108  found_pattern = existing_data.operands().insert(
1109  found_pattern,
1110  characters_to_replace.begin(),
1111  characters_to_replace.end()) +
1112  characters_to_replace.size();
1113 
1114  // Then search from there for any additional matches.
1115  found_pattern = std::search(
1116  found_pattern,
1117  existing_data.operands().end(),
1118  characters_to_find.begin(),
1119  characters_to_find.end());
1120  }
1121 
1122  const constant_exprt new_char_array_length =
1123  from_integer(existing_data.operands().size(), length_type);
1124 
1125  const array_typet new_char_array_type(char_type, new_char_array_length);
1126  const array_exprt new_char_array(
1127  std::move(existing_data.operands()), new_char_array_type);
1128 
1130  state,
1131  symex_assign,
1132  to_ssa_expr(f_l1.arguments().at(0)),
1133  new_char_array_length,
1134  to_ssa_expr(f_l1.arguments().at(1)),
1135  new_char_array);
1136 
1137  return true;
1138 }
1139 
1141  statet &state,
1142  symex_assignt &symex_assign,
1143  const function_application_exprt &f_l1)
1144 {
1145  const auto &f_type = f_l1.function_type();
1146  const auto &length_type = f_type.domain().at(0);
1147  const auto &char_type = to_pointer_type(f_type.domain().at(1)).base_type();
1148 
1149  const refined_string_exprt &s = to_string_expr(f_l1.arguments().at(2));
1150  const auto s_data_opt = try_evaluate_constant_string(state, s.content());
1151 
1152  if(!s_data_opt)
1153  return false;
1154 
1155  auto is_not_whitespace = [](const exprt &expr) {
1156  auto character = numeric_cast_v<unsigned int>(to_constant_expr(expr));
1157  return character > ' ';
1158  };
1159 
1160  // Note the points where a trim would trim too.
1161  auto &operands = s_data_opt->get().operands();
1162  auto end_iter =
1163  std::find_if(operands.rbegin(), operands.rend(), is_not_whitespace);
1164  auto start_iter =
1165  std::find_if(operands.begin(), operands.end(), is_not_whitespace);
1166 
1167  // Then copy in the string with leading/trailing whitespace removed.
1168  // Note: if start_iter == operands.end it means the entire string is
1169  // whitespace, so we'll trim it to be empty anyway.
1170  exprt::operandst new_operands;
1171  if(start_iter != operands.end())
1172  new_operands = exprt::operandst{start_iter, end_iter.base()};
1173 
1174  const constant_exprt new_char_array_length =
1175  from_integer(new_operands.size(), length_type);
1176 
1177  const array_typet new_char_array_type(char_type, new_char_array_length);
1178  const array_exprt new_char_array(
1179  std::move(new_operands), new_char_array_type);
1180 
1182  state,
1183  symex_assign,
1184  to_ssa_expr(f_l1.arguments().at(0)),
1185  new_char_array_length,
1186  to_ssa_expr(f_l1.arguments().at(1)),
1187  new_char_array);
1188 
1189  return true;
1190 }
static abstract_object_pointert transform(const exprt &expr, const std::vector< abstract_object_pointert > &operands, const abstract_environmentt &environment, const namespacet &ns)
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
const integer_bitvector_typet & to_integer_bitvector_type(const typet &type)
Cast a typet to an integer_bitvector_typet.
int16_t s2
Definition: bytecode_info.h:60
int8_t s1
Definition: bytecode_info.h:59
bitvector_typet char_type()
Definition: c_types.cpp:106
bitvector_typet c_index_type()
Definition: c_types.cpp:16
Operator to return the address of an object.
Definition: pointer_expr.h:540
Array constructor from list of elements.
Definition: std_expr.h:1621
const array_typet & type() const
Definition: std_expr.h:1628
Arrays with given size.
Definition: std_types.h:807
const typet & element_type() const
The type of the elements of the array.
Definition: std_types.h:827
framet & top()
Definition: call_stack.h:17
A constant literal expression.
Definition: std_expr.h:3000
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
Equality.
Definition: std_expr.h:1366
Expression in which some part is missing and can be substituted for another expression.
Definition: expr_skeleton.h:26
Base class for all expressions.
Definition: expr.h:56
std::vector< exprt > operandst
Definition: expr.h:58
typet & type()
Return the type of the expression.
Definition: expr.h:84
operandst & operands()
Definition: expr.h:94
exprt apply(const namespacet &ns, goto_symex_statet &state, exprt expr, bool write) const
Turn an expression expr into a field-sensitive SSA expression.
bool is_divisible(const ssa_exprt &expr, bool disjoined_fields_only) const
Determine whether expr would translate to an atomic SSA expression (returns false) or a composite obj...
bool hidden_function
Definition: frame.h:40
Application of (mathematical) function.
const mathematical_function_typet & function_type() const
This helper method provides the type of the expression returned by function.
unsigned atomic_section_id
Threads.
Definition: goto_state.h:76
sharing_mapt< irep_idt, exprt > propagation
Definition: goto_state.h:71
Central data structure: state.
exprt l2_rename_rvalues(exprt lvalue, const namespacet &ns)
call_stackt & call_stack()
renamedt< exprt, level > rename(exprt expr, const namespacet &ns)
Rewrites symbol expressions in exprt, applying a suffix to each symbol reflecting its most recent ver...
symbol_tablet symbol_table
contains symbols that are minted during symbolic execution, such as dynamically created objects etc.
field_sensitivityt field_sensitivity
symex_targett::sourcet source
std::vector< threadt > threads
virtual void symex_assume(statet &state, const exprt &cond)
Symbolically execute an ASSUME instruction or simulate such an execution for a synthetic assumption.
Definition: symex_main.cpp:199
virtual void symex_atomic_begin(statet &state)
Symbolically execute an ATOMIC_BEGIN instruction.
bool constant_propagate_delete_char_at(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1)
Attempt to constant propagate deleting a character from a string.
Definition: goto_symex.cpp:652
bool constant_propagate_set_char_at(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1)
Attempt to constant propagate setting the char at the given index.
Definition: goto_symex.cpp:900
bool constant_propagate_assignment_with_side_effects(statet &state, symex_assignt &symex_assign, const exprt &lhs, const exprt &rhs)
Attempt to constant propagate side effects of the assignment (if any)
Definition: goto_symex.cpp:184
bool constant_propagate_delete(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1)
Attempt to constant propagate deleting a substring from a string.
Definition: goto_symex.cpp:725
static std::optional< std::reference_wrapper< const constant_exprt > > try_evaluate_constant(const statet &state, const exprt &expr)
Definition: goto_symex.cpp:389
void constant_propagate_empty_string(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1)
Create an empty string constant.
Definition: goto_symex.cpp:408
symex_target_equationt & target
The equation that this execution is building up.
Definition: goto_symex.h:266
bool constant_propagate_case_change(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1, bool to_upper)
Attempt to constant propagate case changes, both upper and lower.
Definition: goto_symex.cpp:970
virtual void symex_allocate(statet &state, const exprt &lhs, const side_effect_exprt &code)
Symbolically execute an assignment instruction that has an allocate on the right hand side.
bool constant_propagate_integer_to_string(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1)
Attempt to constant propagate converting an integer to a string.
Definition: goto_symex.cpp:573
exprt clean_expr(exprt expr, statet &state, bool write)
Clean up an expression.
bool constant_propagate_trim(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1)
Attempt to constant propagate trim operations.
shadow_memoryt shadow_memory
Shadow memory instrumentation API.
Definition: goto_symex.h:839
virtual void symex_va_start(statet &, const exprt &lhs, const side_effect_exprt &)
namespacet ns
Initialized just before symbolic execution begins, to point to both outer_symbol_table and the symbol...
Definition: goto_symex.h:258
void symex_assign(statet &state, const exprt &lhs, const exprt &rhs)
Symbolically execute an ASSIGN instruction or simulate such an execution for a synthetic assignment.
Definition: goto_symex.cpp:38
const symbolt & get_new_string_data_symbol(statet &state, symex_assignt &symex_assign, const std::string &aux_symbol_name, const ssa_exprt &char_array, const array_exprt &new_char_array)
Installs a new symbol in the symbol table to represent the given character array, and assigns the cha...
Definition: goto_symex.cpp:302
void associate_array_to_pointer(statet &state, symex_assignt &symex_assign, const array_exprt &new_char_array, const address_of_exprt &string_data)
Generate array to pointer association primitive.
Definition: goto_symex.cpp:338
virtual void do_simplify(exprt &expr)
Definition: goto_symex.cpp:32
virtual void symex_cpp_new(statet &state, const exprt &lhs, const side_effect_exprt &code)
Handles side effects of type 'new' for C++ and 'new array' for C++ and Java language modes.
bool constant_propagate_set_length(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1)
Attempt to constant propagate setting the length of a string.
Definition: goto_symex.cpp:819
messaget log
The messaget to write log messages to.
Definition: goto_symex.h:278
const symex_configt symex_config
The configuration to use for this symbolic execution.
Definition: goto_symex.h:185
bool constant_propagate_string_substring(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1)
Attempt to constant propagate getting a substring of a string.
Definition: goto_symex.cpp:486
bool constant_propagate_replace(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1)
Attempt to constant proagate character replacement.
bool constant_propagate_string_concat(statet &state, symex_assignt &symex_assign, const function_application_exprt &f_l1)
Attempt to constant propagate string concatenation.
Definition: goto_symex.cpp:436
virtual void symex_atomic_end(statet &state)
Symbolically execute an ATOMIC_END instruction.
void assign_string_constant(statet &state, symex_assignt &symex_assign, const ssa_exprt &length, const constant_exprt &new_length, const ssa_exprt &char_array, const array_exprt &new_char_array)
Assign constant string length and string data given by a char array to given ssa variables.
Definition: goto_symex.cpp:258
std::optional< std::reference_wrapper< const array_exprt > > try_evaluate_constant_string(const statet &state, const exprt &content)
Definition: goto_symex.cpp:368
Array index operator.
Definition: std_expr.h:1470
std::string pretty(unsigned indent=0, unsigned max_indent=0) const
Definition: irep.cpp:482
const irep_idt & get(const irep_idt &name) const
Definition: irep.cpp:44
void set(const irep_idt &name, const irep_idt &value)
Definition: irep.h:412
const irep_idt & id() const
Definition: irep.h:388
typet & codomain()
Return the codomain, i.e., the set of values that the function maps to (the "target").
void conditional_output(mstreamt &mstream, const std::function< void(mstreamt &)> &output_generator) const
Generate output to message_stream using output_generator if the configured verbosity is at least as h...
Definition: message.cpp:139
mstreamt & debug() const
Definition: message.h:421
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
Definition: namespace.cpp:148
const typet & base_type() const
The type of the data what we point to.
Definition: pointer_expr.h:35
const exprt & content() const
Definition: string_expr.h:139
An expression containing a side effect.
Definition: std_code.h:1450
const irep_idt & get_statement() const
Definition: std_code.h:1472
Expression providing an SSA-renamed symbol of expressions.
Definition: ssa_expr.h:17
const exprt & get_original_expr() const
Definition: ssa_expr.h:33
Expression to hold a symbol (variable)
Definition: std_expr.h:131
const irep_idt & get_identifier() const
Definition: std_expr.h:160
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
Symbol table entry.
Definition: symbol.h:28
bool is_file_local
Definition: symbol.h:73
bool is_static_lifetime
Definition: symbol.h:70
bool is_thread_local
Definition: symbol.h:71
bool is_state_var
Definition: symbol.h:66
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:121
typet type
Type of symbol.
Definition: symbol.h:31
bool is_lvalue
Definition: symbol.h:72
exprt value
Initial value of symbol.
Definition: symbol.h:34
irep_idt mode
Language mode.
Definition: symbol.h:49
Functor for symex assignment.
Definition: symex_assign.h:28
int isalpha(int c)
Definition: ctype.c:9
int islower(int c)
Definition: ctype.c:34
int toupper(int c)
Definition: ctype.c:134
int tolower(int c)
Definition: ctype.c:109
int isupper(int c)
Definition: ctype.c:90
Expression skeleton.
symbolt & get_fresh_aux_symbol(const typet &type, const std::string &name_prefix, const std::string &basename_prefix, const source_locationt &source_location, const irep_idt &symbol_mode, const namespacet &ns, symbol_table_baset &symbol_table)
Installs a fresh-named symbol with respect to the given namespace ns with the requested name pattern ...
Fresh auxiliary symbol creation.
static std::string get_alnum_string(const array_exprt &char_array)
Maps the given array expression containing constant characters to a string containing only alphanumer...
Definition: goto_symex.cpp:152
Symbolic Execution.
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
API to expression classes for 'mathematical' expressions.
const function_application_exprt & to_function_application_expr(const exprt &expr)
Cast an exprt to a function_application_exprt.
Mathematical types.
const mathematical_function_typet & to_mathematical_function_type(const typet &type)
Cast a typet to a mathematical_function_typet.
const std::string integer2string(const mp_integer &n, unsigned base)
Definition: mp_arith.cpp:103
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: pointer_expr.h:93
Pointer Logic.
bool simplify(exprt &expr, const namespacet &ns)
std::optional< std::reference_wrapper< const array_exprt > > try_get_string_data_array(const exprt &content, const namespacet &ns)
Get char sequence from content field of a refined string expression.
BigInt mp_integer
Definition: smt_terms.h:17
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
#define UNREACHABLE
This should be used to mark dead code.
Definition: invariant.h:525
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:534
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition: invariant.h:423
#define DATA_INVARIANT_WITH_DIAGNOSTICS(CONDITION, REASON,...)
Definition: invariant.h:535
const ssa_exprt & to_ssa_expr(const exprt &expr)
Cast a generic exprt to an ssa_exprt.
Definition: ssa_expr.h:145
bool is_ssa_expr(const exprt &expr)
Definition: ssa_expr.h:125
side_effect_exprt & to_side_effect_expr(exprt &expr)
Definition: std_code.h:1506
const constant_exprt & to_constant_expr(const exprt &expr)
Cast an exprt to a constant_exprt.
Definition: std_expr.h:3055
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:272
const array_typet & to_array_type(const typet &type)
Cast a typet to an array_typet.
Definition: std_types.h:888
String expressions for the string solver.
refined_string_exprt & to_string_expr(exprt &expr)
Definition: string_expr.h:151
std::string escape_non_alnum(const std::string &to_escape)
Replace non-alphanumeric characters with _xx escapes, where xx are hex digits.
goto_programt::const_targett pc
Definition: symex_target.h:42
Symbolic Execution of assignments.