CBMC
remove_instanceof.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Remove Instance-of Operators
4 
5 Author: Chris Smowton, chris.smowton@diffblue.com
6 
7 \*******************************************************************/
8 
11 
12 #include "remove_instanceof.h"
13 
17 
19 
20 #include <util/arith_tools.h>
21 
23 {
24 public:
33  {
34  }
35 
36  // Lower instanceof for a single function
37  bool lower_instanceof(const irep_idt &function_identifier, goto_programt &);
38 
39  // Lower instanceof for a single instruction
40  bool lower_instanceof(
41  const irep_idt &function_identifier,
42  goto_programt &,
44 
45 protected:
50 
51  bool lower_instanceof(
52  const irep_idt &function_identifier,
53  exprt &,
54  goto_programt &,
56 };
57 
68  const exprt &classid_field,
69  const irep_idt &target_type,
70  const class_hierarchyt &class_hierarchy)
71 {
72  std::vector<irep_idt> children =
73  class_hierarchy.get_children_trans(target_type);
74  children.push_back(target_type);
75  // Sort alphabetically to make order of generated disjuncts
76  // independent of class loading order
77  std::sort(
78  children.begin(), children.end(), [](const irep_idt &a, const irep_idt &b) {
79  return a.compare(b) < 0;
80  });
81 
82  exprt::operandst or_ops;
83  for(const auto &class_name : children)
84  {
85  constant_exprt class_expr(class_name, string_typet());
86  equal_exprt test(classid_field, class_expr);
87  or_ops.push_back(test);
88  }
89 
90  return disjunction(or_ops);
91 }
92 
102  const irep_idt &function_identifier,
103  exprt &expr,
104  goto_programt &goto_program,
105  goto_programt::targett this_inst)
106 {
107  if(expr.id()!=ID_java_instanceof)
108  {
109  bool changed = false;
110  Forall_operands(it, expr)
111  changed |=
112  lower_instanceof(function_identifier, *it, goto_program, this_inst);
113  return changed;
114  }
115 
116  INVARIANT(
117  expr.operands().size()==2,
118  "java_instanceof should have two operands");
119 
120  const exprt &check_ptr = to_binary_expr(expr).op0();
121  INVARIANT(
122  check_ptr.type().id()==ID_pointer,
123  "instanceof first operand should be a pointer");
124 
125  const exprt &target_arg = to_binary_expr(expr).op1();
126  INVARIANT(
127  target_arg.id()==ID_type,
128  "instanceof second operand should be a type");
129 
130  INVARIANT(
131  target_arg.type().id() == ID_struct_tag,
132  "instanceof second operand should have a simple type");
133 
134  const auto &target_type = to_struct_tag_type(target_arg.type());
135 
136  const auto underlying_type_and_dimension =
138 
139  bool target_type_is_reference_array =
140  underlying_type_and_dimension.second >= 1 &&
141  can_cast_type<java_reference_typet>(underlying_type_and_dimension.first);
142 
143  // If we're casting to a reference array type, check
144  // @class_identifier == "java::array[reference]" &&
145  // @array_dimension == target_array_dimension &&
146  // @array_element_type (subtype of) target_array_element_type
147  // Otherwise just check
148  // @class_identifier (subtype of) target_type
149 
150  // Exception: when the target type is Object, nothing needs checking; when
151  // it is Object[], Object[][] etc, then we check that
152  // @array_dimension >= target_array_dimension (because
153  // String[][] (subtype of) Object[] for example) and don't check the element
154  // type.
155 
156  // All tests are made directly against a pointer to the object whose type is
157  // queried. By operating directly on a pointer rather than using a temporary
158  // (x->@clsid == "A" rather than id = x->@clsid; id == "A") we enable symex's
159  // value-set filtering to discard no-longer-viable candidates for "x" (in the
160  // case where 'x' is a symbol, not a general expression like x->y->@clsid).
161  // This means there are more clean dereference operations (ones where there
162  // is no possibility of reading a null, invalid or incorrectly-typed object),
163  // and less pessimistic aliasing assumptions possibly causing goto-symex to
164  // explore in-fact-unreachable paths.
165 
166  // In all cases require the input pointer is not null for any cast to succeed:
167 
168  std::vector<exprt> test_conjuncts;
169  test_conjuncts.push_back(notequal_exprt(
170  check_ptr, null_pointer_exprt(to_pointer_type(check_ptr.type()))));
171 
172  auto jlo = to_struct_tag_type(java_lang_object_type().subtype());
173 
174  exprt object_class_identifier_field =
175  get_class_identifier_field(check_ptr, jlo, ns);
176 
177  if(target_type_is_reference_array)
178  {
179  const auto &underlying_type = to_struct_tag_type(
180  to_pointer_type(underlying_type_and_dimension.first).base_type());
181 
182  test_conjuncts.push_back(equal_exprt(
183  object_class_identifier_field,
185 
186  exprt object_array_dimension = get_array_dimension_field(check_ptr);
187  constant_exprt target_array_dimension = from_integer(
188  underlying_type_and_dimension.second, object_array_dimension.type());
189 
190  if(underlying_type == jlo)
191  {
192  test_conjuncts.push_back(binary_relation_exprt(
193  object_array_dimension, ID_ge, target_array_dimension));
194  }
195  else
196  {
197  test_conjuncts.push_back(
198  equal_exprt(object_array_dimension, target_array_dimension));
199  test_conjuncts.push_back(subtype_expr(
200  get_array_element_type_field(check_ptr),
201  underlying_type.get_identifier(),
202  class_hierarchy));
203  }
204  }
205  else if(target_type != jlo)
206  {
207  test_conjuncts.push_back(subtype_expr(
208  get_class_identifier_field(check_ptr, jlo, ns),
209  target_type.get_identifier(),
210  class_hierarchy));
211  }
212 
213  expr = conjunction(test_conjuncts);
214 
215  return true;
216 }
217 
218 static bool contains_instanceof(const exprt &e)
219 {
220  if(e.id() == ID_java_instanceof)
221  return true;
222 
223  for(const exprt &subexpr : e.operands())
224  {
225  if(contains_instanceof(subexpr))
226  return true;
227  }
228 
229  return false;
230 }
231 
240  const irep_idt &function_identifier,
241  goto_programt &goto_program,
242  goto_programt::targett target)
243 {
244  bool changed;
245 
246  if(
247  target->is_target() &&
248  (contains_instanceof(target->code()) ||
249  (target->has_condition() && contains_instanceof(target->condition()))))
250  {
251  // If this is a branch target, add a skip beforehand so we can splice new
252  // GOTO programs before the target instruction without inserting into the
253  // wrong basic block.
254  goto_program.insert_before_swap(target);
255  target->turn_into_skip();
256  // Actually alter the now-moved instruction:
257  ++target;
258  }
259 
260  changed = lower_instanceof(
261  function_identifier, target->code_nonconst(), goto_program, target);
262  changed |=
263  (target->has_condition() ? lower_instanceof(
264  function_identifier,
265  target->condition_nonconst(),
266  goto_program,
267  target)
268  : false);
269  return changed;
270 }
271 
279  const irep_idt &function_identifier,
280  goto_programt &goto_program)
281 {
282  bool changed=false;
283  for(goto_programt::instructionst::iterator target=
284  goto_program.instructions.begin();
285  target!=goto_program.instructions.end();
286  ++target)
287  {
288  changed =
289  lower_instanceof(function_identifier, goto_program, target) || changed;
290  }
291  if(!changed)
292  return false;
293  goto_program.update();
294  return true;
295 }
296 
307  const irep_idt &function_identifier,
308  goto_programt::targett target,
309  goto_programt &goto_program,
310  symbol_table_baset &symbol_table,
311  const class_hierarchyt &class_hierarchy,
312  message_handlert &message_handler)
313 {
314  remove_instanceoft rem(symbol_table, class_hierarchy, message_handler);
315  rem.lower_instanceof(function_identifier, goto_program, target);
316 }
317 
327  const irep_idt &function_identifier,
329  symbol_table_baset &symbol_table,
330  const class_hierarchyt &class_hierarchy,
331  message_handlert &message_handler)
332 {
333  remove_instanceoft rem(symbol_table, class_hierarchy, message_handler);
334  rem.lower_instanceof(function_identifier, function.body);
335 }
336 
345  goto_functionst &goto_functions,
346  symbol_table_baset &symbol_table,
347  const class_hierarchyt &class_hierarchy,
348  message_handlert &message_handler)
349 {
350  remove_instanceoft rem(symbol_table, class_hierarchy, message_handler);
351  bool changed=false;
352  for(auto &f : goto_functions.function_map)
353  changed = rem.lower_instanceof(f.first, f.second.body) || changed;
354  if(changed)
355  goto_functions.compute_location_numbers();
356 }
357 
367  goto_modelt &goto_model,
368  const class_hierarchyt &class_hierarchy,
369  message_handlert &message_handler)
370 {
372  goto_model.goto_functions,
373  goto_model.symbol_table,
374  class_hierarchy,
375  message_handler);
376 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Class Hierarchy.
exprt get_class_identifier_field(const exprt &this_expr_in, const struct_tag_typet &suggested_type, const namespacet &ns)
Extract class identifier.
exprt & op1()
Definition: expr.h:136
exprt & op0()
Definition: expr.h:133
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition: std_expr.h:762
Non-graph-based representation of the class hierarchy.
idst get_children_trans(const irep_idt &id) const
A constant literal expression.
Definition: std_expr.h:2990
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
Equality.
Definition: std_expr.h:1361
Base class for all expressions.
Definition: expr.h:56
std::vector< exprt > operandst
Definition: expr.h:58
typet & type()
Return the type of the expression.
Definition: expr.h:84
operandst & operands()
Definition: expr.h:94
A collection of goto functions.
function_mapt function_map
::goto_functiont goto_functiont
void compute_location_numbers()
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:31
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:34
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:73
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:622
void update()
Update all indices.
void insert_before_swap(targett target)
Insertion that preserves jumps to "target".
Definition: goto_program.h:643
instructionst::iterator targett
Definition: goto_program.h:614
const irep_idt & id() const
Definition: irep.h:388
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:94
Disequality.
Definition: std_expr.h:1420
The null pointer constant.
Definition: pointer_expr.h:909
const typet & base_type() const
The type of the data what we point to.
Definition: pointer_expr.h:35
bool lower_instanceof(const irep_idt &function_identifier, goto_programt &)
Replace every instanceof in the passed function body with an explicit class-identifier test.
message_handlert & message_handler
remove_instanceoft(symbol_table_baset &symbol_table, const class_hierarchyt &class_hierarchy, message_handlert &message_handler)
const class_hierarchyt & class_hierarchy
symbol_table_baset & symbol_table
String type.
Definition: std_types.h:966
The symbol table base class interface.
#define Forall_operands(it, expr)
Definition: expr.h:27
Symbol Table + CFG.
std::pair< typet, std::size_t > java_array_dimension_and_element_type(const struct_tag_typet &type)
Returns the underlying element type and array dimensionality of Java struct type.
Definition: java_types.cpp:189
exprt get_array_element_type_field(const exprt &pointer)
Definition: java_types.cpp:218
exprt get_array_dimension_field(const exprt &pointer)
Definition: java_types.cpp:206
java_reference_typet java_lang_object_type()
Definition: java_types.cpp:98
#define JAVA_REFERENCE_ARRAY_CLASSID
Definition: java_types.h:655
bool can_cast_type< java_reference_typet >(const typet &type)
Definition: java_types.h:623
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: pointer_expr.h:93
static exprt subtype_expr(const exprt &classid_field, const irep_idt &target_type, const class_hierarchyt &class_hierarchy)
Produce an expression of the form classid_field == "A" || classid_field == "B" || ....
static bool contains_instanceof(const exprt &e)
void remove_instanceof(const irep_idt &function_identifier, goto_programt::targett target, goto_programt &goto_program, symbol_table_baset &symbol_table, const class_hierarchyt &class_hierarchy, message_handlert &message_handler)
Replace an instanceof in the expression or guard of the passed instruction of the given function body...
Remove Instance-of Operators.
exprt conjunction(const exprt::operandst &op)
1) generates a conjunction for two or more operands 2) for one operand, returns the operand 3) return...
Definition: std_expr.cpp:83
exprt disjunction(const exprt::operandst &op)
1) generates a disjunction for two or more operands 2) for one operand, returns the operand 3) return...
Definition: std_expr.cpp:71
const binary_exprt & to_binary_expr(const exprt &expr)
Cast an exprt to a binary_exprt.
Definition: std_expr.h:715
const struct_tag_typet & to_struct_tag_type(const typet &type)
Cast a typet to a struct_tag_typet.
Definition: std_types.h:518