CBMC
remove_java_new.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Remove Java New Operators
4 
5 Author: Peter Schrammel
6 
7 \*******************************************************************/
8 
11 
12 #include "remove_java_new.h"
13 
14 #include <util/arith_tools.h>
15 #include <util/c_types.h>
16 #include <util/expr_initializer.h>
17 #include <util/namespace.h>
19 #include <util/std_code.h>
20 
23 
25 
26 #include "java_types.h"
27 #include "java_utils.h"
28 
30 {
31 public:
34  {
35  }
36 
37  // Lower java_new for a single function
38  bool lower_java_new(
39  const irep_idt &function_identifier,
40  goto_programt &,
42 
43  // Lower java_new for a single instruction
45  const irep_idt &function_identifier,
46  goto_programt &,
49 
50 protected:
53 
55  const irep_idt &function_identifier,
56  const exprt &lhs,
57  const side_effect_exprt &rhs,
58  goto_programt &,
60 
62  const irep_idt &function_identifier,
63  const exprt &lhs,
64  const side_effect_exprt &rhs,
65  goto_programt &,
68 };
69 
83  const irep_idt &function_identifier,
84  const exprt &lhs,
85  const side_effect_exprt &rhs,
86  goto_programt &dest,
88 {
89  PRECONDITION(!lhs.is_nil());
90  PRECONDITION(rhs.operands().empty());
91  PRECONDITION(rhs.type().id() == ID_pointer);
92  source_locationt location = rhs.source_location();
93  typet object_type = to_pointer_type(rhs.type()).base_type();
94 
95  // build size expression
96  const auto object_size = size_of_expr(object_type, ns);
97  CHECK_RETURN(object_size.has_value());
98 
99  // we produce a malloc side-effect, which stays
100  side_effect_exprt malloc_expr(ID_allocate, rhs.type(), location);
101  malloc_expr.copy_to_operands(*object_size);
102  // could use true and get rid of the code below
103  malloc_expr.copy_to_operands(false_exprt());
104  *target =
105  goto_programt::make_assignment(code_assignt(lhs, malloc_expr), location);
106 
107  // zero-initialize the object
108  dereference_exprt deref(lhs, object_type);
109  auto zero_object = zero_initializer(object_type, location, ns);
110  CHECK_RETURN(zero_object.has_value());
112  to_struct_expr(*zero_object), ns, to_struct_tag_type(object_type));
113  return dest.insert_after(
114  target,
116  code_assignt(deref, *zero_object), location));
117 }
118 
133  const irep_idt &function_identifier,
134  const exprt &lhs,
135  const side_effect_exprt &rhs,
136  goto_programt &dest,
137  goto_programt::targett target,
138  message_handlert &message_handler)
139 {
140  // lower_java_new_array without lhs not implemented
141  PRECONDITION(!lhs.is_nil());
142  PRECONDITION(rhs.operands().size() >= 1); // one per dimension
143  PRECONDITION(rhs.type().id() == ID_pointer);
144 
145  source_locationt location = rhs.source_location();
146  struct_tag_typet object_type =
148 
149  // build size expression
150  const auto object_size = size_of_expr(object_type, ns);
151  CHECK_RETURN(object_size.has_value());
152 
153  // we produce a malloc side-effect, which stays
154  side_effect_exprt malloc_expr(ID_allocate, rhs.type(), location);
155  malloc_expr.copy_to_operands(*object_size);
156  // code use true and get rid of the code below
157  malloc_expr.copy_to_operands(false_exprt());
158 
159  *target =
160  goto_programt::make_assignment(code_assignt(lhs, malloc_expr), location);
161  goto_programt::targett next = std::next(target);
162 
163  const struct_typet &struct_type = ns.follow_tag(object_type);
164 
165  PRECONDITION(is_valid_java_array(struct_type));
166 
167  // Init base class:
168  dereference_exprt deref(lhs, object_type);
169  auto zero_object = zero_initializer(object_type, location, ns);
170  CHECK_RETURN(zero_object.has_value());
171  set_class_identifier(to_struct_expr(*zero_object), ns, object_type);
172  dest.insert_before(
173  next,
175  code_assignt(deref, *zero_object), location));
176 
177  // If it's a reference array we need to set the dimension and element type
178  // fields. Primitive array types don't have these fields; if the element type
179  // is a void pointer then the element type is statically unknown and the
180  // caller must set these up itself. This happens in array[reference].clone(),
181  // where the type info must be copied over from the input array)
182  const auto underlying_type_and_dimension =
184 
185  bool target_type_is_reference_array =
186  underlying_type_and_dimension.second >= 1 &&
187  can_cast_type<java_reference_typet>(underlying_type_and_dimension.first);
188 
189  if(target_type_is_reference_array)
190  {
191  exprt object_array_dimension = get_array_dimension_field(lhs);
192  dest.insert_before(
193  next,
195  object_array_dimension,
196  from_integer(
197  underlying_type_and_dimension.second, object_array_dimension.type()),
198  location)));
199 
200  exprt object_array_element_type = get_array_element_type_field(lhs);
201  dest.insert_before(
202  next,
204  object_array_element_type,
207  to_pointer_type(underlying_type_and_dimension.first).base_type())
208  .get_identifier(),
209  string_typet()),
210  location)));
211  }
212 
213  // if it's an array, we need to set the length field
214  member_exprt length(
215  deref,
216  struct_type.components()[1].get_name(),
217  struct_type.components()[1].type());
218  dest.insert_before(
219  next,
221  code_assignt(length, to_multi_ary_expr(rhs).op0()), location));
222 
223  // we also need to allocate space for the data
224  member_exprt data(
225  deref,
226  struct_type.components()[2].get_name(),
227  struct_type.components()[2].type());
228 
229  // Allocate a (struct realtype**) instead of a (void**) if possible.
230  const irept &given_element_type = object_type.find(ID_element_type);
231  typet allocate_data_type;
232  if(given_element_type.is_not_nil())
233  {
234  allocate_data_type =
235  pointer_type(static_cast<const typet &>(given_element_type));
236  }
237  else
238  allocate_data_type = data.type();
239 
240  side_effect_exprt data_java_new_expr(
241  ID_java_new_array_data, allocate_data_type, location);
242 
243  // The instruction may specify a (hopefully small) upper bound on the
244  // array size, in which case we allocate a fixed-length array that may
245  // be larger than the `length` member rather than use a true variable-
246  // length array, which produces a more complex formula in the current
247  // backend.
248  const irept size_bound = rhs.find(ID_length_upper_bound);
249  if(size_bound.is_nil())
250  data_java_new_expr.set(ID_size, to_multi_ary_expr(rhs).op0());
251  else
252  data_java_new_expr.set(ID_size, size_bound);
253 
254  // Must directly assign the new array to a temporary
255  // because goto-symex will notice `x=side_effect_exprt` but not
256  // `x=typecast_exprt(side_effect_exprt(...))`
257  symbol_exprt new_array_data_symbol = fresh_java_symbol(
258  data_java_new_expr.type(),
259  "tmp_new_data_array",
260  location,
261  function_identifier,
262  symbol_table)
263  .symbol_expr();
264  code_declt array_decl(new_array_data_symbol);
265  array_decl.add_source_location() = location;
266  dest.insert_before(next, goto_programt::make_decl(array_decl, location));
267  dest.insert_before(
268  next,
270  code_assignt(new_array_data_symbol, data_java_new_expr), location));
271 
272  exprt cast_java_new = new_array_data_symbol;
273  if(cast_java_new.type() != data.type())
274  cast_java_new = typecast_exprt(cast_java_new, data.type());
275  dest.insert_before(
276  next,
278  code_assignt(data, cast_java_new), location));
279 
280  // zero-initialize the data
281  if(!rhs.get_bool(ID_skip_initialize))
282  {
283  const auto zero_element =
284  zero_initializer(to_pointer_type(data.type()).base_type(), location, ns);
285  CHECK_RETURN(zero_element.has_value());
286  codet array_set{ID_array_set, {new_array_data_symbol, *zero_element}};
287  dest.insert_before(next, goto_programt::make_other(array_set, location));
288  }
289 
290  // multi-dimensional?
291 
292  if(rhs.operands().size() >= 2)
293  {
294  // produce
295  // for(int i=0; i<size; i++) tmp[i]=java_new(dim-1);
296  // This will be converted recursively.
297 
298  goto_programt tmp;
299 
300  symbol_exprt tmp_i =
302  length.type(), "tmp_index", location, function_identifier, symbol_table)
303  .symbol_expr();
304  code_declt decl(tmp_i);
305  decl.add_source_location() = location;
306  tmp.insert_before(
307  tmp.instructions.begin(), goto_programt::make_decl(decl, location));
308 
309  side_effect_exprt sub_java_new = rhs;
310  sub_java_new.operands().erase(sub_java_new.operands().begin());
311 
312  // we already know that rhs has pointer type
313  typet sub_type = static_cast<const typet &>(
314  to_pointer_type(rhs.type()).base_type().find(ID_element_type));
315  CHECK_RETURN(sub_type.id() == ID_pointer);
316  sub_java_new.type() = sub_type;
317 
318  plus_exprt(tmp_i, from_integer(1, tmp_i.type()));
320  plus_exprt(data, tmp_i), to_pointer_type(data.type()).base_type());
321 
322  code_blockt for_body;
323  symbol_exprt init_sym =
325  sub_type, "subarray_init", location, function_identifier, symbol_table)
326  .symbol_expr();
327  code_declt init_decl(init_sym);
328  init_decl.add_source_location() = location;
329  for_body.add(std::move(init_decl));
330 
331  code_assignt init_subarray(init_sym, sub_java_new);
332  for_body.add(std::move(init_subarray));
333  code_assignt assign_subarray(
334  deref_expr, typecast_exprt(init_sym, deref_expr.type()));
335  for_body.add(std::move(assign_subarray));
336 
337  const code_fort for_loop = code_fort::from_index_bounds(
338  from_integer(0, tmp_i.type()),
339  to_multi_ary_expr(rhs).op0(),
340  tmp_i,
341  std::move(for_body),
342  location);
343 
344  goto_convert(for_loop, symbol_table, tmp, message_handler, ID_java);
345 
346  // lower new side effects recursively
347  lower_java_new(function_identifier, tmp, message_handler);
348 
349  dest.destructive_insert(next, tmp);
350  }
351 
352  return std::prev(next);
353 }
354 
363  const irep_idt &function_identifier,
364  goto_programt &goto_program,
365  goto_programt::targett target,
366  message_handlert &message_handler)
367 {
368  if(!target->is_assign())
369  return target;
370 
371  if(as_const(*target).assign_rhs().id() == ID_side_effect)
372  {
373  // we make a copy, as we intend to destroy the assignment
374  // inside lower_java_new and lower_java_new_array
375  exprt lhs = target->assign_lhs();
376  exprt rhs = target->assign_rhs();
377 
378  const auto &side_effect_expr = to_side_effect_expr(rhs);
379  const auto &statement = side_effect_expr.get_statement();
380 
381  if(statement == ID_java_new)
382  {
383  return lower_java_new(
384  function_identifier, lhs, side_effect_expr, goto_program, target);
385  }
386  else if(statement == ID_java_new_array)
387  {
388  return lower_java_new_array(
389  function_identifier,
390  lhs,
391  side_effect_expr,
392  goto_program,
393  target,
394  message_handler);
395  }
396  }
397 
398  return target;
399 }
400 
409  const irep_idt &function_identifier,
410  goto_programt &goto_program,
411  message_handlert &message_handler)
412 {
413  bool changed = false;
414  for(goto_programt::instructionst::iterator target =
415  goto_program.instructions.begin();
416  target != goto_program.instructions.end();
417  ++target)
418  {
420  function_identifier, goto_program, target, message_handler);
421  changed = changed || new_target == target;
422  target = new_target;
423  }
424  if(!changed)
425  return false;
426  goto_program.update();
427  return true;
428 }
429 
439  const irep_idt &function_identifier,
440  goto_programt::targett target,
441  goto_programt &goto_program,
442  symbol_table_baset &symbol_table,
443  message_handlert &message_handler)
444 {
445  remove_java_newt rem{symbol_table};
446  rem.lower_java_new(
447  function_identifier, goto_program, target, message_handler);
448 }
449 
458  const irep_idt &function_identifier,
460  symbol_table_baset &symbol_table,
461  message_handlert &message_handler)
462 {
463  remove_java_newt rem{symbol_table};
464  rem.lower_java_new(function_identifier, function.body, message_handler);
465 }
466 
474  goto_functionst &goto_functions,
475  symbol_table_baset &symbol_table,
476  message_handlert &message_handler)
477 {
478  remove_java_newt rem{symbol_table};
479  bool changed = false;
480  for(auto &f : goto_functions.function_map)
481  changed =
482  rem.lower_java_new(f.first, f.second.body, message_handler) || changed;
483  if(changed)
484  goto_functions.compute_location_numbers();
485 }
486 
493 void remove_java_new(goto_modelt &goto_model, message_handlert &message_handler)
494 {
496  goto_model.goto_functions, goto_model.symbol_table, message_handler);
497 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
const T & as_const(T &value)
Return a reference to the same object but ensures the type is const.
Definition: as_const.h:14
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:235
void set_class_identifier(struct_exprt &expr, const namespacet &ns, const struct_tag_typet &class_type)
If expr has its components filled in then sets the @class_identifier member of the struct.
Extract class identifier.
A goto_instruction_codet representing an assignment in the program.
A codet representing sequential composition of program statements.
Definition: std_code.h:130
void add(const codet &code)
Definition: std_code.h:168
A goto_instruction_codet representing the declaration of a local variable.
codet representation of a for statement.
Definition: std_code.h:734
static code_fort from_index_bounds(exprt start_index, exprt end_index, symbol_exprt loop_index, codet body, source_locationt location)
Produce a code_fort representing:
Definition: std_code.cpp:155
Data structure for representing an arbitrary statement in a program.
Definition: std_code_base.h:29
A constant literal expression.
Definition: std_expr.h:3000
Operator to dereference a pointer.
Definition: pointer_expr.h:834
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
Base class for all expressions.
Definition: expr.h:56
void copy_to_operands(const exprt &expr)
Copy the given argument to the end of exprt's operands.
Definition: expr.h:163
source_locationt & add_source_location()
Definition: expr.h:236
const source_locationt & source_location() const
Definition: expr.h:231
typet & type()
Return the type of the expression.
Definition: expr.h:84
operandst & operands()
Definition: expr.h:94
The Boolean constant false.
Definition: std_expr.h:3082
A collection of goto functions.
function_mapt function_map
::goto_functiont goto_functiont
void compute_location_numbers()
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:31
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:34
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:73
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:622
void update()
Update all indices.
void destructive_insert(const_targett target, goto_programt &p)
Inserts the given program p before target.
Definition: goto_program.h:730
instructionst::iterator targett
Definition: goto_program.h:614
static instructiont make_assignment(const code_assignt &_code, const source_locationt &l=source_locationt::nil())
Create an assignment instruction.
targett insert_after(const_targett target)
Insertion after the instruction pointed-to by the given instruction iterator target.
Definition: goto_program.h:708
static instructiont make_other(const goto_instruction_codet &_code, const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:957
static instructiont make_decl(const symbol_exprt &symbol, const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:964
targett insert_before(const_targett target)
Insertion before the instruction pointed-to by the given instruction iterator target.
Definition: goto_program.h:692
There are a large number of kinds of tree structured or tree-like data in CPROVER.
Definition: irep.h:364
bool get_bool(const irep_idt &name) const
Definition: irep.cpp:57
const irept & find(const irep_idt &name) const
Definition: irep.cpp:93
void set(const irep_idt &name, const irep_idt &value)
Definition: irep.h:412
bool is_not_nil() const
Definition: irep.h:372
const irep_idt & id() const
Definition: irep.h:388
bool is_nil() const
Definition: irep.h:368
Extract member of struct or union.
Definition: std_expr.h:2854
const union_typet & follow_tag(const union_tag_typet &) const
Follow type tag of union type.
Definition: namespace.cpp:63
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:94
The plus expression Associativity is not specified.
Definition: std_expr.h:1002
const typet & base_type() const
The type of the data what we point to.
Definition: pointer_expr.h:35
remove_java_newt(symbol_table_baset &symbol_table)
symbol_table_baset & symbol_table
bool lower_java_new(const irep_idt &function_identifier, goto_programt &, message_handlert &)
Replace every java_new or java_new_array by a malloc side-effect and zero initialization.
goto_programt::targett lower_java_new_array(const irep_idt &function_identifier, const exprt &lhs, const side_effect_exprt &rhs, goto_programt &, goto_programt::targett, message_handlert &)
Replaces the instruction lhs = new java_array_type by the following code: lhs = ALLOCATE(java_type) l...
An expression containing a side effect.
Definition: std_code.h:1450
String type.
Definition: std_types.h:966
A struct tag type, i.e., struct_typet with an identifier.
Definition: std_types.h:493
Structure type, corresponds to C style structs.
Definition: std_types.h:231
const componentst & components() const
Definition: std_types.h:147
Expression to hold a symbol (variable)
Definition: std_expr.h:131
The symbol table base class interface.
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:121
const irep_idt & get_identifier() const
Definition: std_types.h:410
Semantic type conversion.
Definition: std_expr.h:2073
The type of an expression, extends irept.
Definition: type.h:29
std::optional< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Expression Initialization.
void goto_convert(const codet &code, symbol_table_baset &symbol_table, goto_programt &dest, message_handlert &message_handler, const irep_idt &mode)
Program Transformation.
Symbol Table + CFG.
bool is_valid_java_array(const struct_typet &type)
Programmatic documentation of the structure of a Java array (of either primitives or references) type...
Definition: java_types.cpp:838
std::pair< typet, std::size_t > java_array_dimension_and_element_type(const struct_tag_typet &type)
Returns the underlying element type and array dimensionality of Java struct type.
Definition: java_types.cpp:189
exprt get_array_element_type_field(const exprt &pointer)
Definition: java_types.cpp:218
exprt get_array_dimension_field(const exprt &pointer)
Definition: java_types.cpp:206
bool can_cast_type< java_reference_typet >(const typet &type)
Definition: java_types.h:623
symbolt & fresh_java_symbol(const typet &type, const std::string &basename_prefix, const source_locationt &source_location, const irep_idt &function_name, symbol_table_baset &symbol_table)
Definition: java_utils.cpp:555
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: pointer_expr.h:93
std::optional< exprt > size_of_expr(const typet &type, const namespacet &ns)
Pointer Logic.
exprt object_size(const exprt &pointer)
void remove_java_new(const irep_idt &function_identifier, goto_programt::targett target, goto_programt &goto_program, symbol_table_baset &symbol_table, message_handlert &message_handler)
Replace every java_new or java_new_array by a malloc side-effect and zero initialization.
Remove Java New Operators.
exprt deref_expr(const exprt &expr)
Wraps a given expression into a dereference_exprt unless it is an address_of_exprt in which case it j...
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
side_effect_exprt & to_side_effect_expr(exprt &expr)
Definition: std_code.h:1506
const multi_ary_exprt & to_multi_ary_expr(const exprt &expr)
Cast an exprt to a multi_ary_exprt.
Definition: std_expr.h:987
const struct_exprt & to_struct_expr(const exprt &expr)
Cast an exprt to a struct_exprt.
Definition: std_expr.h:1900
const struct_tag_typet & to_struct_tag_type(const typet &type)
Cast a typet to a struct_tag_typet.
Definition: std_types.h:518