CBMC
ci_lazy_methods.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Java Bytecode
4 
5 Author: Diffblue Ltd.
6 
7 \*******************************************************************/
8 
9 #include "ci_lazy_methods.h"
10 
11 #include <util/expr_iterator.h>
12 #include <util/message.h>
13 #include <util/namespace.h>
14 #include <util/suffix.h>
15 #include <util/symbol_table.h>
16 
18 
19 #include "java_bytecode_language.h"
20 #include "java_class_loader.h"
21 #include "java_entry_point.h"
22 #include "remove_exceptions.h"
23 
40  const symbol_table_baset &symbol_table,
41  const irep_idt &main_class,
42  const std::vector<irep_idt> &main_jar_classes,
43  const std::vector<load_extra_methodst> &lazy_methods_extra_entry_points,
44  java_class_loadert &java_class_loader,
45  const std::vector<irep_idt> &extra_instantiated_classes,
46  const select_pointer_typet &pointer_type_selector,
47  const synthetic_methods_mapt &synthetic_methods)
48  : main_class(main_class),
49  main_jar_classes(main_jar_classes),
50  lazy_methods_extra_entry_points(lazy_methods_extra_entry_points),
51  java_class_loader(java_class_loader),
52  extra_instantiated_classes(extra_instantiated_classes),
53  pointer_type_selector(pointer_type_selector),
54  synthetic_methods(synthetic_methods)
55 {
56  // build the class hierarchy
57  class_hierarchy(symbol_table);
58 }
59 
66 static bool references_class_model(const exprt &expr)
67 {
68  static const struct_tag_typet class_type("java::java.lang.Class");
69 
70  for(auto it = expr.depth_begin(); it != expr.depth_end(); ++it)
71  {
73  it->type() == class_type &&
74  has_suffix(
77  {
78  return true;
79  }
80  }
81 
82  return false;
83 }
84 
102  symbol_table_baset &symbol_table,
103  method_bytecodet &method_bytecode,
104  const method_convertert &method_converter,
105  message_handlert &message_handler)
106 {
107  std::unordered_set<irep_idt> methods_to_convert_later =
108  entry_point_methods(symbol_table, message_handler);
109 
110  // Add any extra entry points specified; we should elaborate these in the
111  // same way as the main function.
112  for(const auto &extra_function_generator : lazy_methods_extra_entry_points)
113  {
114  std::vector<irep_idt> extra_methods =
115  extra_function_generator(symbol_table);
116  methods_to_convert_later.insert(extra_methods.begin(), extra_methods.end());
117  }
118 
119  std::unordered_set<irep_idt> instantiated_classes;
120 
121  {
122  std::unordered_set<irep_idt> initial_callable_methods;
123  ci_lazy_methods_neededt initial_lazy_methods(
124  initial_callable_methods,
125  instantiated_classes,
126  symbol_table,
129  methods_to_convert_later, namespacet(symbol_table), initial_lazy_methods);
130  methods_to_convert_later.insert(
131  initial_callable_methods.begin(), initial_callable_methods.end());
132  }
133 
134  std::unordered_set<irep_idt> methods_already_populated;
135  std::unordered_set<class_method_descriptor_exprt, irep_hash>
136  called_virtual_functions;
137  bool class_initializer_seen = false;
138 
139  messaget log{message_handler};
140 
141  bool any_new_classes = true;
142  while(any_new_classes)
143  {
144  bool any_new_methods = true;
145  while(any_new_methods)
146  {
147  any_new_methods = false;
148  while(!methods_to_convert_later.empty())
149  {
150  std::unordered_set<irep_idt> methods_to_convert;
151  std::swap(methods_to_convert, methods_to_convert_later);
152  for(const auto &mname : methods_to_convert)
153  {
154  const auto conversion_result = convert_and_analyze_method(
155  method_converter,
156  methods_already_populated,
157  class_initializer_seen,
158  mname,
159  symbol_table,
160  methods_to_convert_later,
161  instantiated_classes,
162  called_virtual_functions,
163  message_handler);
164  any_new_methods |= conversion_result.new_method_seen;
165  class_initializer_seen |= conversion_result.class_initializer_seen;
166  }
167  }
168 
169  // Given the object types we now know may be created, populate more
170  // possible virtual function call targets:
171 
172  log.debug() << "CI lazy methods: add virtual method targets ("
173  << called_virtual_functions.size() << " callsites)"
174  << messaget::eom;
175 
176  for(const class_method_descriptor_exprt &called_virtual_function :
177  called_virtual_functions)
178  {
180  called_virtual_function,
181  instantiated_classes,
182  methods_to_convert_later,
183  symbol_table);
184  }
185  }
186 
187  any_new_classes = handle_virtual_methods_with_no_callees(
188  methods_to_convert_later,
189  instantiated_classes,
190  called_virtual_functions,
191  symbol_table);
192  }
193 
194  // Remove symbols for methods that were declared but never used:
195  symbol_tablet keep_symbols;
196  // Manually keep @inflight_exception, as it is unused at this stage
197  // but will become used when the `remove_exceptions` pass is run:
198  keep_symbols.add(symbol_table.lookup_ref(INFLIGHT_EXCEPTION_VARIABLE_NAME));
199 
200  for(const auto &sym : symbol_table.symbols)
201  {
202  // Don't keep global variables (unless they're gathered below from a
203  // function that references them)
204  if(sym.second.is_static_lifetime)
205  continue;
206  if(sym.second.type.id()==ID_code)
207  {
208  // Don't keep functions that belong to this language that we haven't
209  // converted above
210  if(
211  (method_bytecode.contains_method(sym.first) ||
212  synthetic_methods.count(sym.first)) &&
213  !methods_already_populated.count(sym.first))
214  {
215  continue;
216  }
217  // If this is a function then add all the things used in it
218  gather_needed_globals(sym.second.value, symbol_table, keep_symbols);
219  }
220  keep_symbols.add(sym.second);
221  }
222 
223  log.debug() << "CI lazy methods: removed "
224  << symbol_table.symbols.size() - keep_symbols.symbols.size()
225  << " unreachable methods and globals" << messaget::eom;
226 
227  auto sorted_to_keep = keep_symbols.sorted_symbol_names();
228  auto all_sorted = symbol_table.sorted_symbol_names();
229  auto it = sorted_to_keep.cbegin();
230  for(const auto &id : all_sorted)
231  {
232  if(it == sorted_to_keep.cend() || id != *it)
233  symbol_table.remove(id);
234  else
235  ++it;
236  }
237 
238  return false;
239 }
240 
249  std::unordered_set<irep_idt> &methods_to_convert_later,
250  std::unordered_set<irep_idt> &instantiated_classes,
251  const std::unordered_set<class_method_descriptor_exprt, irep_hash>
252  &virtual_functions,
253  symbol_table_baset &symbol_table)
254 {
255  ci_lazy_methods_neededt lazy_methods_loader(
256  methods_to_convert_later,
257  instantiated_classes,
258  symbol_table,
260 
261  bool any_new_classes = false;
262  for(const class_method_descriptor_exprt &virtual_function : virtual_functions)
263  {
264  std::unordered_set<irep_idt> candidate_target_methods;
266  virtual_function,
267  instantiated_classes,
268  candidate_target_methods,
269  symbol_table);
270 
271  if(!candidate_target_methods.empty())
272  continue;
273 
274  const java_method_typet &java_method_type =
275  to_java_method_type(virtual_function.type());
276 
277  // Add the call class to instantiated_classes and assert that it
278  // didn't already exist. It can't be instantiated already, otherwise it
279  // would give a concrete definition of the called method, and
280  // candidate_target_methods would be non-empty.
281  const irep_idt &call_class = virtual_function.class_id();
282  bool was_missing = instantiated_classes.count(call_class) == 0;
283  CHECK_RETURN(was_missing);
284  any_new_classes = true;
285 
286  const typet &this_type = java_method_type.get_this()->type();
287  if(
288  const pointer_typet *this_pointer_type =
289  type_try_dynamic_cast<pointer_typet>(this_type))
290  {
291  lazy_methods_loader.add_all_needed_classes(*this_pointer_type);
292  }
293 
294  // That should in particular have added call_class to the possibly
295  // instantiated types.
296  bool still_missing = instantiated_classes.count(call_class) == 0;
297  CHECK_RETURN(!still_missing);
298 
299  // Make sure we add our return type as required, as we may not have
300  // seen any concrete instances of it being created.
301  const typet &return_type = java_method_type.return_type();
302  if(
303  const pointer_typet *return_pointer_type =
304  type_try_dynamic_cast<pointer_typet>(return_type))
305  {
306  lazy_methods_loader.add_all_needed_classes(*return_pointer_type);
307  }
308 
309  // Check that `get_virtual_method_target` returns a method now
310  const irep_idt &method_name = virtual_function.mangled_method_name();
311  const irep_idt method_id = get_virtual_method_target(
312  instantiated_classes, method_name, call_class, symbol_table);
313  CHECK_RETURN(!method_id.empty());
314 
315  // Add what it returns to methods_to_convert_later
316  methods_to_convert_later.insert(method_id);
317  }
318  return any_new_classes;
319 }
320 
332  const method_convertert &method_converter,
333  std::unordered_set<irep_idt> &methods_already_populated,
334  const bool class_initializer_already_seen,
335  const irep_idt &method_name,
336  symbol_table_baset &symbol_table,
337  std::unordered_set<irep_idt> &methods_to_convert_later,
338  std::unordered_set<irep_idt> &instantiated_classes,
339  std::unordered_set<class_method_descriptor_exprt, irep_hash>
340  &called_virtual_functions,
341  message_handlert &message_handler)
342 {
343  convert_method_resultt result;
344  if(!methods_already_populated.insert(method_name).second)
345  return result;
346 
347  messaget log{message_handler};
348  log.debug() << "CI lazy methods: elaborate " << method_name << messaget::eom;
349 
350  // Note this wraps *references* to methods_to_convert_later &
351  // instantiated_classes
352  ci_lazy_methods_neededt needed_methods(
353  methods_to_convert_later,
354  instantiated_classes,
355  symbol_table,
357 
358  if(method_converter(method_name, needed_methods))
359  return result;
360 
361  const exprt &method_body = symbol_table.lookup_ref(method_name).value;
362  gather_virtual_callsites(method_body, called_virtual_functions);
363 
364  if(!class_initializer_already_seen && references_class_model(method_body))
365  {
366  result.class_initializer_seen = true;
367  const irep_idt initializer_signature =
369  if(symbol_table.has_symbol(initializer_signature))
370  methods_to_convert_later.insert(initializer_signature);
371  }
372  result.new_method_seen = true;
373  return result;
374 }
375 
381 std::unordered_set<irep_idt> ci_lazy_methodst::entry_point_methods(
382  const symbol_table_baset &symbol_table,
383  message_handlert &message_handler)
384 {
385  std::unordered_set<irep_idt> methods_to_convert_later;
386 
387  const main_function_resultt main_function =
388  get_main_symbol(symbol_table, this->main_class, message_handler);
389  if(!main_function.is_success())
390  {
391  // Failed, mark all functions in the given main class(es)
392  // reachable.
393  std::vector<irep_idt> reachable_classes;
394  if(!this->main_class.empty())
395  reachable_classes.push_back(this->main_class);
396  else
397  reachable_classes = this->main_jar_classes;
398  for(const irep_idt &class_name : reachable_classes)
399  {
400  const auto &methods =
401  this->java_class_loader.get_original_class(class_name)
403  for(const auto &method : methods)
404  {
405  const irep_idt methodid = "java::" + id2string(class_name) + "." +
406  id2string(method.name) + ":" +
407  id2string(method.descriptor);
408  methods_to_convert_later.insert(methodid);
409  }
410  }
411  }
412  else
413  methods_to_convert_later.insert(main_function.main_function.name);
414  return methods_to_convert_later;
415 }
416 
426  const std::unordered_set<irep_idt> &entry_points,
427  const namespacet &ns,
428  ci_lazy_methods_neededt &needed_lazy_methods)
429 {
430  for(const auto &mname : entry_points)
431  {
432  const auto &symbol=ns.lookup(mname);
433  const auto &mtype = to_java_method_type(symbol.type);
434  for(const auto &param : mtype.parameters())
435  {
436  if(param.type().id()==ID_pointer)
437  {
438  const pointer_typet &original_pointer = to_pointer_type(param.type());
439  needed_lazy_methods.add_all_needed_classes(original_pointer);
440  }
441  }
442  }
443 
444  // Also add classes whose instances are magically
445  // created by the JVM and so won't be spotted by
446  // looking for constructors and calls as usual:
447  needed_lazy_methods.add_needed_class("java::java.lang.String");
448  needed_lazy_methods.add_needed_class("java::java.lang.Class");
449  needed_lazy_methods.add_needed_class("java::java.lang.Object");
450 
451  // As in class_loader, ensure these classes stay available
452  for(const auto &id : extra_instantiated_classes)
453  needed_lazy_methods.add_needed_class("java::" + id2string(id));
454 }
455 
461  const exprt &e,
462  std::unordered_set<class_method_descriptor_exprt, irep_hash> &result)
463 {
464  if(e.id()!=ID_code)
465  return;
466  const codet &c=to_code(e);
467  if(
468  c.get_statement() == ID_function_call &&
470  to_code_function_call(c).function()))
471  {
472  result.insert(
474  }
475  else
476  {
477  for(const exprt &op : e.operands())
478  gather_virtual_callsites(op, result);
479  }
480 }
481 
493  const class_method_descriptor_exprt &called_function,
494  const std::unordered_set<irep_idt> &instantiated_classes,
495  std::unordered_set<irep_idt> &callable_methods,
496  symbol_table_baset &symbol_table)
497 {
498  const auto &call_class = called_function.class_id();
499  const auto &method_name = called_function.mangled_method_name();
500 
501  class_hierarchyt::idst self_and_child_classes =
503  self_and_child_classes.push_back(call_class);
504 
505  for(const irep_idt &class_name : self_and_child_classes)
506  {
507  const irep_idt method_id = get_virtual_method_target(
508  instantiated_classes, method_name, class_name, symbol_table);
509  if(!method_id.empty())
510  callable_methods.insert(method_id);
511  }
512 }
513 
520  const exprt &e,
521  const symbol_table_baset &symbol_table,
522  symbol_table_baset &needed)
523 {
524  if(e.id()==ID_symbol)
525  {
526  // If the symbol isn't in the symbol table at all, then it is defined
527  // on an opaque type (i.e. we don't have the class definition at this point)
528  // and will be created during the typecheck phase.
529  // We don't mark it as 'needed' as it doesn't exist yet to keep.
530  const auto findit=
531  symbol_table.symbols.find(to_symbol_expr(e).get_identifier());
532  if(findit!=symbol_table.symbols.end() &&
533  findit->second.is_static_lifetime)
534  {
535  needed.add(findit->second);
536  // Gather any globals referenced in the initialiser:
537  gather_needed_globals(findit->second.value, symbol_table, needed);
538  }
539  }
540  else
541  {
542  for(const auto &op : e.operands())
543  gather_needed_globals(op, symbol_table, needed);
544  }
545 }
546 
560  const std::unordered_set<irep_idt> &instantiated_classes,
561  const irep_idt &call_basename,
562  const irep_idt &classname,
563  const symbol_table_baset &symbol_table)
564 {
565  // Program-wide, is this class ever instantiated?
566  if(!instantiated_classes.count(classname))
567  return irep_idt();
568 
569  auto resolved_call =
570  get_inherited_method_implementation(call_basename, classname, symbol_table);
571 
572  if(resolved_call)
573  return resolved_call->get_full_component_identifier();
574  else
575  return irep_idt();
576 }
static bool references_class_model(const exprt &expr)
Checks if an expression refers to any class literals (e.g.
Collect methods needed to be loaded using the lazy method.
std::function< bool(const irep_idt &function_id, ci_lazy_methods_neededt)> method_convertert
void add_all_needed_classes(const pointer_typet &pointer_type)
Add to the needed classes all classes specified, the replacement type if it will be replaced,...
bool add_needed_class(const irep_idt &)
Notes class class_symbol_name will be instantiated, or a static field belonging to it will be accesse...
convert_method_resultt convert_and_analyze_method(const method_convertert &method_converter, std::unordered_set< irep_idt > &methods_already_populated, const bool class_initializer_already_seen, const irep_idt &method_name, symbol_table_baset &symbol_table, std::unordered_set< irep_idt > &methods_to_convert_later, std::unordered_set< irep_idt > &instantiated_classes, std::unordered_set< class_method_descriptor_exprt, irep_hash > &called_virtual_functions, message_handlert &message_handler)
Convert a method, add it to the populated set, add needed methods to methods_to_convert_later and add...
const std::vector< irep_idt > & extra_instantiated_classes
irep_idt get_virtual_method_target(const std::unordered_set< irep_idt > &instantiated_classes, const irep_idt &call_basename, const irep_idt &classname, const symbol_table_baset &symbol_table)
Find a virtual callee, if one is defined and the callee type is known to exist.
void get_virtual_method_targets(const class_method_descriptor_exprt &called_function, const std::unordered_set< irep_idt > &instantiated_classes, std::unordered_set< irep_idt > &callable_methods, symbol_table_baset &symbol_table)
Find possible callees, excluding types that are not known to be instantiated.
std::vector< irep_idt > main_jar_classes
const select_pointer_typet & pointer_type_selector
const std::vector< load_extra_methodst > & lazy_methods_extra_entry_points
class_hierarchyt class_hierarchy
void gather_virtual_callsites(const exprt &e, std::unordered_set< class_method_descriptor_exprt, irep_hash > &result)
Get places where virtual functions are called.
java_class_loadert & java_class_loader
bool operator()(symbol_table_baset &symbol_table, method_bytecodet &method_bytecode, const method_convertert &method_converter, message_handlert &message_handler)
Uses a simple context-insensitive ('ci') analysis to determine which methods may be reachable from th...
const synthetic_methods_mapt & synthetic_methods
ci_lazy_methodst(const symbol_table_baset &symbol_table, const irep_idt &main_class, const std::vector< irep_idt > &main_jar_classes, const std::vector< load_extra_methodst > &lazy_methods_extra_entry_points, java_class_loadert &java_class_loader, const std::vector< irep_idt > &extra_instantiated_classes, const select_pointer_typet &pointer_type_selector, const synthetic_methods_mapt &synthetic_methods)
Constructor for lazy-method loading.
void gather_needed_globals(const exprt &e, const symbol_table_baset &symbol_table, symbol_table_baset &needed)
See output.
void initialize_instantiated_classes(const std::unordered_set< irep_idt > &entry_points, const namespacet &ns, ci_lazy_methods_neededt &needed_lazy_methods)
Build up a list of methods whose type may be passed around reachable from the entry point.
bool handle_virtual_methods_with_no_callees(std::unordered_set< irep_idt > &methods_to_convert_later, std::unordered_set< irep_idt > &instantiated_classes, const std::unordered_set< class_method_descriptor_exprt, irep_hash > &virtual_functions, symbol_table_baset &symbol_table)
Look for virtual callsites with no candidate targets.
std::unordered_set< irep_idt > entry_point_methods(const symbol_table_baset &symbol_table, message_handlert &message_handler)
Entry point methods are either:
idst get_children_trans(const irep_idt &id) const
std::vector< irep_idt > idst
An expression describing a method on a class.
Definition: std_expr.h:3503
const irep_idt & class_id() const
Unique identifier in the symbol table, of the compile time type of the class which this expression is...
Definition: std_expr.h:3548
const irep_idt & mangled_method_name() const
The method name after mangling it by combining it with the descriptor.
Definition: std_expr.h:3540
const typet & return_type() const
Definition: std_types.h:689
const parametert * get_this() const
Definition: std_types.h:665
Data structure for representing an arbitrary statement in a program.
Definition: std_code_base.h:29
const irep_idt & get_statement() const
Definition: std_code_base.h:65
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
bool empty() const
Definition: dstring.h:89
Base class for all expressions.
Definition: expr.h:56
depth_iteratort depth_end()
Definition: expr.cpp:249
depth_iteratort depth_begin()
Definition: expr.cpp:247
typet & type()
Return the type of the expression.
Definition: expr.h:84
operandst & operands()
Definition: expr.h:94
const irep_idt & id() const
Definition: irep.h:388
Class responsible to load .class files.
const java_bytecode_parse_treet & get_original_class(const irep_idt &class_name)
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:154
static eomt eom
Definition: message.h:289
bool contains_method(const irep_idt &method_id) const
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:94
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
Definition: namespace.cpp:148
The pointer type These are both 'bitvector_typet' (they have a width) and 'type_with_subtypet' (they ...
Definition: pointer_expr.h:24
A struct tag type, i.e., struct_typet with an identifier.
Definition: std_types.h:493
const irep_idt & get_identifier() const
Definition: std_expr.h:160
The symbol table base class interface.
std::vector< irep_idt > sorted_symbol_names() const
Build and return a lexicographically sorted vector of symbol names from all symbols stored in this sy...
bool remove(const irep_idt &name)
Remove a symbol from the symbol table.
const symbolst & symbols
Read-only field, used to look up symbols given their names.
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
The symbol table.
Definition: symbol_table.h:14
irep_idt name
The unique identifier.
Definition: symbol.h:40
exprt value
Initial value of symbol.
Definition: symbol.h:34
The type of an expression, extends irept.
Definition: type.h:29
Forward depth-first search iterators These iterators' copy operations are expensive,...
const code_function_callt & to_code_function_call(const goto_instruction_codet &code)
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
#define JAVA_CLASS_MODEL_SUFFIX
main_function_resultt get_main_symbol(const symbol_table_baset &symbol_table, const irep_idt &main_class, message_handlert &message_handler)
Figures out the entry point of the code to verify.
irep_idt get_java_class_literal_initializer_signature()
Get the symbol name of java.lang.Class' initializer method.
const java_method_typet & to_java_method_type(const typet &type)
Definition: java_types.h:183
double log(double x)
Definition: math.c:2776
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: pointer_expr.h:93
Remove function exceptional returns.
#define INFLIGHT_EXCEPTION_VARIABLE_NAME
std::optional< resolve_inherited_componentt::inherited_componentt > get_inherited_method_implementation(const irep_idt &call_basename, const irep_idt &classname, const symbol_table_baset &symbol_table)
Given a class and a component, identify the concrete method it is resolved to.
Given a class and a component (either field or method), find the closest parent that defines that com...
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
const codet & to_code(const exprt &expr)
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:272
bool can_cast_expr< class_method_descriptor_exprt >(const exprt &base)
Definition: std_expr.h:3601
bool can_cast_expr< symbol_exprt >(const exprt &base)
Definition: std_expr.h:256
const class_method_descriptor_exprt & to_class_method_descriptor_expr(const exprt &expr)
Cast an exprt to a class_method_descriptor_exprt.
Definition: std_expr.h:3593
bool has_suffix(const std::string &s, const std::string &suffix)
Definition: suffix.h:17
Author: Diffblue Ltd.
std::unordered_map< irep_idt, synthetic_method_typet > synthetic_methods_mapt
Maps method names on to a synthetic method kind.
dstringt irep_idt