CBMC
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
ci_lazy_methods.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: Java Bytecode
4
5Author: Diffblue Ltd.
6
7\*******************************************************************/
8
9#include "ci_lazy_methods.h"
10
11#include <util/expr_iterator.h>
12#include <util/message.h>
13#include <util/namespace.h>
14#include <util/suffix.h>
15#include <util/symbol_table.h>
16
18
20#include "java_class_loader.h"
21#include "java_entry_point.h"
22#include "remove_exceptions.h"
23
40 const symbol_table_baset &symbol_table,
41 const irep_idt &main_class,
42 const std::vector<irep_idt> &main_jar_classes,
43 const std::vector<load_extra_methodst> &lazy_methods_extra_entry_points,
44 java_class_loadert &java_class_loader,
45 const std::vector<irep_idt> &extra_instantiated_classes,
46 const select_pointer_typet &pointer_type_selector,
47 const synthetic_methods_mapt &synthetic_methods)
48 : main_class(main_class),
49 main_jar_classes(main_jar_classes),
50 lazy_methods_extra_entry_points(lazy_methods_extra_entry_points),
51 java_class_loader(java_class_loader),
52 extra_instantiated_classes(extra_instantiated_classes),
53 pointer_type_selector(pointer_type_selector),
54 synthetic_methods(synthetic_methods)
55{
56 // build the class hierarchy
57 class_hierarchy(symbol_table);
58}
59
66static bool references_class_model(const exprt &expr)
67{
68 static const struct_tag_typet class_type("java::java.lang.Class");
69
70 for(auto it = expr.depth_begin(); it != expr.depth_end(); ++it)
71 {
73 it->type() == class_type &&
75 id2string(to_symbol_expr(*it).get_identifier()),
77 {
78 return true;
79 }
80 }
81
82 return false;
83}
84
102 symbol_table_baset &symbol_table,
103 method_bytecodet &method_bytecode,
105 message_handlert &message_handler)
106{
107 std::unordered_set<irep_idt> methods_to_convert_later =
108 entry_point_methods(symbol_table, message_handler);
109
110 // Add any extra entry points specified; we should elaborate these in the
111 // same way as the main function.
113 {
114 std::vector<irep_idt> extra_methods =
115 extra_function_generator(symbol_table);
116 methods_to_convert_later.insert(extra_methods.begin(), extra_methods.end());
117 }
118
119 std::unordered_set<irep_idt> instantiated_classes;
120
121 {
122 std::unordered_set<irep_idt> initial_callable_methods;
125 instantiated_classes,
126 symbol_table,
132 }
133
134 std::unordered_set<irep_idt> methods_already_populated;
135 std::unordered_set<class_method_descriptor_exprt, irep_hash>
137 bool class_initializer_seen = false;
138
139 messaget log{message_handler};
140
141 bool any_new_classes = true;
142 while(any_new_classes)
143 {
144 bool any_new_methods = true;
145 while(any_new_methods)
146 {
147 any_new_methods = false;
148 while(!methods_to_convert_later.empty())
149 {
150 std::unordered_set<irep_idt> methods_to_convert;
152 for(const auto &mname : methods_to_convert)
153 {
157 class_initializer_seen,
158 mname,
159 symbol_table,
161 instantiated_classes,
163 message_handler);
164 any_new_methods |= conversion_result.new_method_seen;
165 class_initializer_seen |= conversion_result.class_initializer_seen;
166 }
167 }
168
169 // Given the object types we now know may be created, populate more
170 // possible virtual function call targets:
171
172 log.debug() << "CI lazy methods: add virtual method targets ("
173 << called_virtual_functions.size() << " callsites)"
174 << messaget::eom;
175
178 {
181 instantiated_classes,
183 symbol_table);
184 }
185 }
186
189 instantiated_classes,
191 symbol_table);
192 }
193
194 // Remove symbols for methods that were declared but never used:
196 // Manually keep @inflight_exception, as it is unused at this stage
197 // but will become used when the `remove_exceptions` pass is run:
199
200 for(const auto &sym : symbol_table.symbols)
201 {
202 // Don't keep global variables (unless they're gathered below from a
203 // function that references them)
204 if(sym.second.is_static_lifetime)
205 continue;
206 if(sym.second.type.id()==ID_code)
207 {
208 // Don't keep functions that belong to this language that we haven't
209 // converted above
210 if(
211 (method_bytecode.contains_method(sym.first) ||
212 synthetic_methods.count(sym.first)) &&
213 !methods_already_populated.count(sym.first))
214 {
215 continue;
216 }
217 // If this is a function then add all the things used in it
218 gather_needed_globals(sym.second.value, symbol_table, keep_symbols);
219 }
220 keep_symbols.add(sym.second);
221 }
222
223 log.debug() << "CI lazy methods: removed "
224 << symbol_table.symbols.size() - keep_symbols.symbols.size()
225 << " unreachable methods and globals" << messaget::eom;
226
227 auto sorted_to_keep = keep_symbols.sorted_symbol_names();
228 auto all_sorted = symbol_table.sorted_symbol_names();
229 auto it = sorted_to_keep.cbegin();
230 for(const auto &id : all_sorted)
231 {
232 if(it == sorted_to_keep.cend() || id != *it)
233 symbol_table.remove(id);
234 else
235 ++it;
236 }
237
238 return false;
239}
240
249 std::unordered_set<irep_idt> &methods_to_convert_later,
250 std::unordered_set<irep_idt> &instantiated_classes,
251 const std::unordered_set<class_method_descriptor_exprt, irep_hash>
253 symbol_table_baset &symbol_table)
254{
257 instantiated_classes,
258 symbol_table,
260
261 bool any_new_classes = false;
263 {
264 std::unordered_set<irep_idt> candidate_target_methods;
267 instantiated_classes,
269 symbol_table);
270
271 if(!candidate_target_methods.empty())
272 continue;
273
276
277 // Add the call class to instantiated_classes and assert that it
278 // didn't already exist. It can't be instantiated already, otherwise it
279 // would give a concrete definition of the called method, and
280 // candidate_target_methods would be non-empty.
281 const irep_idt &call_class = virtual_function.class_id();
282 bool was_missing = instantiated_classes.count(call_class) == 0;
284 any_new_classes = true;
285
286 const typet &this_type = java_method_type.get_this()->type();
287 if(
290 {
291 lazy_methods_loader.add_all_needed_classes(*this_pointer_type);
292 }
293
294 // That should in particular have added call_class to the possibly
295 // instantiated types.
296 bool still_missing = instantiated_classes.count(call_class) == 0;
298
299 // Make sure we add our return type as required, as we may not have
300 // seen any concrete instances of it being created.
301 const typet &return_type = java_method_type.return_type();
302 if(
305 {
306 lazy_methods_loader.add_all_needed_classes(*return_pointer_type);
307 }
308
309 // Check that `get_virtual_method_target` returns a method now
310 const irep_idt &method_name = virtual_function.mangled_method_name();
311 const irep_idt method_id = get_virtual_method_target(
312 instantiated_classes, method_name, call_class, symbol_table);
313 CHECK_RETURN(!method_id.empty());
314
315 // Add what it returns to methods_to_convert_later
316 methods_to_convert_later.insert(method_id);
317 }
318 return any_new_classes;
319}
320
333 std::unordered_set<irep_idt> &methods_already_populated,
335 const irep_idt &method_name,
336 symbol_table_baset &symbol_table,
337 std::unordered_set<irep_idt> &methods_to_convert_later,
338 std::unordered_set<irep_idt> &instantiated_classes,
339 std::unordered_set<class_method_descriptor_exprt, irep_hash>
341 message_handlert &message_handler)
342{
344 if(!methods_already_populated.insert(method_name).second)
345 return result;
346
347 messaget log{message_handler};
348 log.debug() << "CI lazy methods: elaborate " << method_name << messaget::eom;
349
350 // Note this wraps *references* to methods_to_convert_later &
351 // instantiated_classes
354 instantiated_classes,
355 symbol_table,
357
359 return result;
360
361 const exprt &method_body = symbol_table.lookup_ref(method_name).value;
363
365 {
366 result.class_initializer_seen = true;
369 if(symbol_table.has_symbol(initializer_signature))
371 }
372 result.new_method_seen = true;
373 return result;
374}
375
381std::unordered_set<irep_idt> ci_lazy_methodst::entry_point_methods(
382 const symbol_table_baset &symbol_table,
383 message_handlert &message_handler)
384{
385 std::unordered_set<irep_idt> methods_to_convert_later;
386
387 const main_function_resultt main_function =
388 get_main_symbol(symbol_table, this->main_class, message_handler);
389 if(!main_function.is_success())
390 {
391 // Failed, mark all functions in the given main class(es)
392 // reachable.
393 std::vector<irep_idt> reachable_classes;
394 if(!this->main_class.empty())
395 reachable_classes.push_back(this->main_class);
396 else
397 reachable_classes = this->main_jar_classes;
398 for(const irep_idt &class_name : reachable_classes)
399 {
400 const auto &methods =
401 this->java_class_loader.get_original_class(class_name)
402 .parsed_class.methods;
403 for(const auto &method : methods)
404 {
405 const irep_idt methodid = "java::" + id2string(class_name) + "." +
406 id2string(method.name) + ":" +
407 id2string(method.descriptor);
409 }
410 }
411 }
412 else
413 methods_to_convert_later.insert(main_function.main_function.name);
415}
416
426 const std::unordered_set<irep_idt> &entry_points,
427 const namespacet &ns,
428 ci_lazy_methods_neededt &needed_lazy_methods)
429{
430 for(const auto &mname : entry_points)
431 {
432 const auto &symbol=ns.lookup(mname);
433 const auto &mtype = to_java_method_type(symbol.type);
434 for(const auto &param : mtype.parameters())
435 {
436 if(param.type().id()==ID_pointer)
437 {
439 needed_lazy_methods.add_all_needed_classes(original_pointer);
440 }
441 }
442 }
443
444 // Also add classes whose instances are magically
445 // created by the JVM and so won't be spotted by
446 // looking for constructors and calls as usual:
447 needed_lazy_methods.add_needed_class("java::java.lang.String");
448 needed_lazy_methods.add_needed_class("java::java.lang.Class");
449 needed_lazy_methods.add_needed_class("java::java.lang.Object");
450
451 // As in class_loader, ensure these classes stay available
452 for(const auto &id : extra_instantiated_classes)
453 needed_lazy_methods.add_needed_class("java::" + id2string(id));
454}
455
461 const exprt &e,
462 std::unordered_set<class_method_descriptor_exprt, irep_hash> &result)
463{
464 if(e.id()!=ID_code)
465 return;
466 const codet &c=to_code(e);
467 if(
468 c.get_statement() == ID_function_call &&
470 to_code_function_call(c).function()))
471 {
472 result.insert(
474 }
475 else
476 {
477 for(const exprt &op : e.operands())
478 gather_virtual_callsites(op, result);
479 }
480}
481
493 const class_method_descriptor_exprt &called_function,
494 const std::unordered_set<irep_idt> &instantiated_classes,
495 std::unordered_set<irep_idt> &callable_methods,
496 symbol_table_baset &symbol_table)
497{
498 const auto &call_class = called_function.class_id();
499 const auto &method_name = called_function.mangled_method_name();
500
504
505 for(const irep_idt &class_name : self_and_child_classes)
506 {
507 const irep_idt method_id = get_virtual_method_target(
508 instantiated_classes, method_name, class_name, symbol_table);
509 if(!method_id.empty())
510 callable_methods.insert(method_id);
511 }
512}
513
520 const exprt &e,
521 const symbol_table_baset &symbol_table,
523{
524 if(e.id()==ID_symbol)
525 {
526 // If the symbol isn't in the symbol table at all, then it is defined
527 // on an opaque type (i.e. we don't have the class definition at this point)
528 // and will be created during the typecheck phase.
529 // We don't mark it as 'needed' as it doesn't exist yet to keep.
530 const auto findit=
531 symbol_table.symbols.find(to_symbol_expr(e).get_identifier());
532 if(findit!=symbol_table.symbols.end() &&
533 findit->second.is_static_lifetime)
534 {
535 needed.add(findit->second);
536 // Gather any globals referenced in the initialiser:
537 gather_needed_globals(findit->second.value, symbol_table, needed);
538 }
539 }
540 else
541 {
542 for(const auto &op : e.operands())
543 gather_needed_globals(op, symbol_table, needed);
544 }
545}
546
560 const std::unordered_set<irep_idt> &instantiated_classes,
561 const irep_idt &call_basename,
562 const irep_idt &classname,
563 const symbol_table_baset &symbol_table)
564{
565 // Program-wide, is this class ever instantiated?
566 if(!instantiated_classes.count(classname))
567 return irep_idt();
568
569 auto resolved_call =
571
572 if(resolved_call)
573 return resolved_call->get_full_component_identifier();
574 else
575 return irep_idt();
576}
static bool references_class_model(const exprt &expr)
Checks if an expression refers to any class literals (e.g.
Collect methods needed to be loaded using the lazy method.
std::function< bool(const irep_idt &function_id, ci_lazy_methods_neededt)> method_convertert
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:562
void add_all_needed_classes(const pointer_typet &pointer_type)
Add to the needed classes all classes specified, the replacement type if it will be replaced,...
bool add_needed_class(const irep_idt &)
Notes class class_symbol_name will be instantiated, or a static field belonging to it will be accesse...
convert_method_resultt convert_and_analyze_method(const method_convertert &method_converter, std::unordered_set< irep_idt > &methods_already_populated, const bool class_initializer_already_seen, const irep_idt &method_name, symbol_table_baset &symbol_table, std::unordered_set< irep_idt > &methods_to_convert_later, std::unordered_set< irep_idt > &instantiated_classes, std::unordered_set< class_method_descriptor_exprt, irep_hash > &called_virtual_functions, message_handlert &message_handler)
Convert a method, add it to the populated set, add needed methods to methods_to_convert_later and add...
const std::vector< irep_idt > & extra_instantiated_classes
irep_idt get_virtual_method_target(const std::unordered_set< irep_idt > &instantiated_classes, const irep_idt &call_basename, const irep_idt &classname, const symbol_table_baset &symbol_table)
Find a virtual callee, if one is defined and the callee type is known to exist.
void get_virtual_method_targets(const class_method_descriptor_exprt &called_function, const std::unordered_set< irep_idt > &instantiated_classes, std::unordered_set< irep_idt > &callable_methods, symbol_table_baset &symbol_table)
Find possible callees, excluding types that are not known to be instantiated.
std::vector< irep_idt > main_jar_classes
const select_pointer_typet & pointer_type_selector
const std::vector< load_extra_methodst > & lazy_methods_extra_entry_points
class_hierarchyt class_hierarchy
void gather_virtual_callsites(const exprt &e, std::unordered_set< class_method_descriptor_exprt, irep_hash > &result)
Get places where virtual functions are called.
java_class_loadert & java_class_loader
bool operator()(symbol_table_baset &symbol_table, method_bytecodet &method_bytecode, const method_convertert &method_converter, message_handlert &message_handler)
Uses a simple context-insensitive ('ci') analysis to determine which methods may be reachable from th...
const synthetic_methods_mapt & synthetic_methods
ci_lazy_methodst(const symbol_table_baset &symbol_table, const irep_idt &main_class, const std::vector< irep_idt > &main_jar_classes, const std::vector< load_extra_methodst > &lazy_methods_extra_entry_points, java_class_loadert &java_class_loader, const std::vector< irep_idt > &extra_instantiated_classes, const select_pointer_typet &pointer_type_selector, const synthetic_methods_mapt &synthetic_methods)
Constructor for lazy-method loading.
void gather_needed_globals(const exprt &e, const symbol_table_baset &symbol_table, symbol_table_baset &needed)
See output.
void initialize_instantiated_classes(const std::unordered_set< irep_idt > &entry_points, const namespacet &ns, ci_lazy_methods_neededt &needed_lazy_methods)
Build up a list of methods whose type may be passed around reachable from the entry point.
bool handle_virtual_methods_with_no_callees(std::unordered_set< irep_idt > &methods_to_convert_later, std::unordered_set< irep_idt > &instantiated_classes, const std::unordered_set< class_method_descriptor_exprt, irep_hash > &virtual_functions, symbol_table_baset &symbol_table)
Look for virtual callsites with no candidate targets.
std::unordered_set< irep_idt > entry_point_methods(const symbol_table_baset &symbol_table, message_handlert &message_handler)
Entry point methods are either:
idst get_children_trans(const irep_idt &id) const
std::vector< irep_idt > idst
An expression describing a method on a class.
Definition std_expr.h:3633
const irep_idt & mangled_method_name() const
The method name after mangling it by combining it with the descriptor.
Definition std_expr.h:3670
const irep_idt & class_id() const
Unique identifier in the symbol table, of the compile time type of the class which this expression is...
Definition std_expr.h:3678
Data structure for representing an arbitrary statement in a program.
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition dstring.h:38
bool empty() const
Definition dstring.h:89
Base class for all expressions.
Definition expr.h:56
depth_iteratort depth_end()
Definition expr.cpp:249
depth_iteratort depth_begin()
Definition expr.cpp:247
operandst & operands()
Definition expr.h:94
const irep_idt & id() const
Definition irep.h:388
Class responsible to load .class files.
const java_bytecode_parse_treet & get_original_class(const irep_idt &class_name)
Class that provides messages with a built-in verbosity 'level'.
Definition message.h:154
static eomt eom
Definition message.h:289
bool contains_method(const irep_idt &method_id) const
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition namespace.h:91
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
The pointer type These are both 'bitvector_typet' (they have a width) and 'type_with_subtypet' (they ...
A struct tag type, i.e., struct_typet with an identifier.
Definition std_types.h:493
The symbol table base class interface.
std::vector< irep_idt > sorted_symbol_names() const
Build and return a lexicographically sorted vector of symbol names from all symbols stored in this sy...
bool remove(const irep_idt &name)
Remove a symbol from the symbol table.
const symbolst & symbols
Read-only field, used to look up symbols given their names.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
The symbol table.
irep_idt name
The unique identifier.
Definition symbol.h:40
The type of an expression, extends irept.
Definition type.h:29
Forward depth-first search iterators These iterators' copy operations are expensive,...
const code_function_callt & to_code_function_call(const goto_instruction_codet &code)
const std::string & id2string(const irep_idt &d)
Definition irep.h:44
#define JAVA_CLASS_MODEL_SUFFIX
main_function_resultt get_main_symbol(const symbol_table_baset &symbol_table, const irep_idt &main_class, message_handlert &message_handler)
Figures out the entry point of the code to verify.
irep_idt get_java_class_literal_initializer_signature()
Get the symbol name of java.lang.Class' initializer method.
const java_method_typet & to_java_method_type(const typet &type)
Definition java_types.h:183
double log(double x)
Definition math.c:2449
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Remove function exceptional returns.
#define INFLIGHT_EXCEPTION_VARIABLE_NAME
std::optional< resolve_inherited_componentt::inherited_componentt > get_inherited_method_implementation(const irep_idt &call_basename, const irep_idt &classname, const symbol_table_baset &symbol_table)
Given a class and a component, identify the concrete method it is resolved to.
Given a class and a component (either field or method), find the closest parent that defines that com...
#define CHECK_RETURN(CONDITION)
Definition invariant.h:495
const codet & to_code(const exprt &expr)
const class_method_descriptor_exprt & to_class_method_descriptor_expr(const exprt &expr)
Cast an exprt to a class_method_descriptor_exprt.
Definition std_expr.h:3723
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition std_expr.h:272
bool has_suffix(const std::string &s, const std::string &suffix)
Definition suffix.h:17
Author: Diffblue Ltd.
std::unordered_map< irep_idt, synthetic_method_typet > synthetic_methods_mapt
Maps method names on to a synthetic method kind.
dstringt irep_idt