CBMC
java_bytecode_convert_method.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: JAVA Bytecode Language Conversion
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
12 #ifdef DEBUG
13 #include <iostream>
14 #endif
15 
17 
18 #include <util/arith_tools.h>
19 #include <util/bitvector_expr.h>
20 #include <util/c_types.h>
21 #include <util/expr_initializer.h>
22 #include <util/floatbv_expr.h>
23 #include <util/ieee_float.h>
24 #include <util/invariant.h>
25 #include <util/namespace.h>
26 #include <util/prefix.h>
27 #include <util/prefix_filter.h> // IWYU pragma: keep
28 #include <util/std_expr.h>
29 #include <util/symbol_table_base.h>
30 #include <util/threeval.h>
31 
33 
35 
36 #include "bytecode_info.h"
38 #include "java_expr.h"
42 #include "java_types.h"
43 #include "java_utils.h"
44 #include "lambda_synthesis.h"
45 #include "pattern.h"
46 
47 #include <algorithm>
48 #include <limits>
49 
63  java_method_typet &ftype,
64  const irep_idt &name_prefix,
65  symbol_table_baset &symbol_table)
66 {
67  java_method_typet::parameterst &parameters = ftype.parameters();
68 
69  // Mostly borrowed from java_bytecode_convert.cpp; maybe factor this out.
70  // assign names to parameters
71  for(std::size_t i=0; i<parameters.size(); ++i)
72  {
73  irep_idt base_name, identifier;
74 
75  if(i==0 && parameters[i].get_this())
76  base_name = ID_this;
77  else
78  base_name="stub_ignored_arg"+std::to_string(i);
79 
80  identifier=id2string(name_prefix)+"::"+id2string(base_name);
81  parameters[i].set_base_name(base_name);
82  parameters[i].set_identifier(identifier);
83 
84  // add to symbol table
85  parameter_symbolt parameter_symbol;
86  parameter_symbol.base_name=base_name;
87  parameter_symbol.mode=ID_java;
88  parameter_symbol.name=identifier;
89  parameter_symbol.type=parameters[i].type();
90  symbol_table.add(parameter_symbol);
91  }
92 }
93 
95  const irep_idt &identifier,
96  const irep_idt &base_name,
97  const irep_idt &pretty_name,
98  const typet &type,
100  symbol_table_baset &symbol_table,
101  message_handlert &message_handler)
102 {
103  messaget log(message_handler);
104 
105  symbolt symbol{identifier, type, ID_java};
106  symbol.base_name = base_name;
107  symbol.pretty_name = pretty_name;
108  symbol.type.set(ID_access, ID_private);
109  to_java_method_type(symbol.type).set_is_final(true);
111  to_java_method_type(symbol.type), symbol.name, symbol_table);
113 
114  log.debug() << "Generating codet: new opaque symbol: method '" << symbol.name
115  << "'" << messaget::eom;
116  symbol_table.add(symbol);
117 }
118 
119 static bool is_constructor(const irep_idt &method_name)
120 {
121  return id2string(method_name).find("<init>") != std::string::npos;
122 }
123 
125 {
126  if(stack.size()<n)
127  {
128  log.error() << "malformed bytecode (pop too high)" << messaget::eom;
129  throw 0;
130  }
131 
132  exprt::operandst operands;
133  operands.resize(n);
134  for(std::size_t i=0; i<n; i++)
135  operands[i]=stack[stack.size()-n+i];
136 
137  stack.resize(stack.size()-n);
138  return operands;
139 }
140 
143 {
144  std::size_t residue_size=std::min(n, stack.size());
145 
146  stack.resize(stack.size()-residue_size);
147 }
148 
150 {
151  stack.resize(stack.size()+o.size());
152 
153  for(std::size_t i=0; i<o.size(); i++)
154  stack[stack.size()-o.size()+i]=o[i];
155 }
156 
157 // JVM program locations
159 {
160  return "pc"+id2string(address);
161 }
162 
164  const std::string &prefix,
165  const typet &type)
166 {
167  irep_idt base_name=prefix+"_tmp"+std::to_string(tmp_vars.size());
168  irep_idt identifier=id2string(current_method)+"::"+id2string(base_name);
169 
170  auxiliary_symbolt tmp_symbol;
171  tmp_symbol.base_name=base_name;
172  tmp_symbol.is_static_lifetime=false;
173  tmp_symbol.mode=ID_java;
174  tmp_symbol.name=identifier;
175  tmp_symbol.type=type;
176  symbol_table.add(tmp_symbol);
177 
178  symbol_exprt result(identifier, type);
179  result.set(ID_C_base_name, base_name);
180  tmp_vars.push_back(result);
181 
182  return result;
183 }
184 
197  const exprt &arg,
198  char type_char,
199  size_t address)
200 {
201  const std::size_t number_int =
202  numeric_cast_v<std::size_t>(to_constant_expr(arg));
203  variablest &var_list=variables[number_int];
204 
205  // search variable in list for correct frame / address if necessary
206  const variablet &var=
207  find_variable_for_slot(address, var_list);
208 
209  if(!var.symbol_expr.get_identifier().empty())
210  return var.symbol_expr;
211 
212  // an unnamed local variable
213  irep_idt base_name = "anonlocal::" + std::to_string(number_int) + type_char;
214  irep_idt identifier = id2string(current_method) + "::" + id2string(base_name);
215 
216  symbol_exprt result(identifier, java_type_from_char(type_char));
217  result.set(ID_C_base_name, base_name);
218  used_local_names.insert(result);
219  return std::move(result);
220 }
221 
230  const std::string &descriptor,
231  const std::optional<std::string> &signature,
232  const std::string &class_name,
233  const std::string &method_name,
234  message_handlert &message_handler)
235 {
236  // In order to construct the method type, we can either use signature or
237  // descriptor. Since only signature contains the generics info, we want to
238  // use signature whenever possible. We revert to using descriptor if (1) the
239  // signature does not exist, (2) an unsupported generics are present or
240  // (3) in the special case when the number of parameters in the descriptor
241  // does not match the number of parameters in the signature - e.g. for
242  // certain types of inner classes and enums (see unit tests for examples).
243 
244  messaget message(message_handler);
245 
246  auto member_type_from_descriptor = java_type_from_string(descriptor);
247  INVARIANT(
248  member_type_from_descriptor.has_value() &&
249  member_type_from_descriptor->id() == ID_code,
250  "Must be code type");
251  if(signature.has_value())
252  {
253  try
254  {
255  auto member_type_from_signature =
256  java_type_from_string(signature.value(), class_name);
257  INVARIANT(
258  member_type_from_signature.has_value() &&
259  member_type_from_signature->id() == ID_code,
260  "Must be code type");
261  if(
262  to_java_method_type(*member_type_from_signature).parameters().size() ==
263  to_java_method_type(*member_type_from_descriptor).parameters().size())
264  {
265  return to_java_method_type(*member_type_from_signature);
266  }
267  else
268  {
269  message.debug() << "Method: " << class_name << "." << method_name
270  << "\n signature: " << signature.value()
271  << "\n descriptor: " << descriptor
272  << "\n different number of parameters, reverting to "
273  "descriptor"
274  << message.eom;
275  }
276  }
278  {
279  message.debug() << "Method: " << class_name << "." << method_name
280  << "\n could not parse signature: " << signature.value()
281  << "\n " << e.what() << "\n"
282  << " reverting to descriptor: " << descriptor
283  << message.eom;
284  }
285  }
286  return to_java_method_type(*member_type_from_descriptor);
287 }
288 
300  symbolt &class_symbol,
301  const irep_idt &method_identifier,
303  symbol_table_baset &symbol_table,
304  message_handlert &message_handler)
305 {
306  java_method_typet member_type = member_type_lazy(
307  m.descriptor,
308  m.signature,
309  id2string(class_symbol.name),
310  id2string(m.base_name),
311  message_handler);
312 
313  symbolt method_symbol{method_identifier, typet{}, ID_java};
314  method_symbol.base_name=m.base_name;
315  method_symbol.location=m.source_location;
316  method_symbol.location.set_function(method_identifier);
317 
318  if(m.is_public)
319  member_type.set_access(ID_public);
320  else if(m.is_protected)
321  member_type.set_access(ID_protected);
322  else if(m.is_private)
323  member_type.set_access(ID_private);
324  else
325  member_type.set_access(ID_default);
326 
327  if(m.is_synchronized)
328  member_type.set(ID_is_synchronized, true);
329  if(m.is_static)
330  member_type.set(ID_is_static, true);
331  member_type.set_native(m.is_native);
332  member_type.set_is_varargs(m.is_varargs);
333  member_type.set_is_synthetic(m.is_synthetic);
334 
335  if(m.is_bridge)
336  member_type.set(ID_is_bridge_method, m.is_bridge);
337 
338  // do we need to add 'this' as a parameter?
339  if(!m.is_static)
340  {
341  java_method_typet::parameterst &parameters = member_type.parameters();
342  const reference_typet object_ref_type =
344  java_method_typet::parametert this_p(object_ref_type);
345  this_p.set_this();
346  parameters.insert(parameters.begin(), this_p);
347  }
348 
349  const std::string signature_string = pretty_signature(member_type);
350 
351  if(is_constructor(method_symbol.base_name))
352  {
353  // we use full.class_name(...) as pretty name
354  // for constructors -- the idea is that they have
355  // an empty declarator.
356  method_symbol.pretty_name=
357  id2string(class_symbol.pretty_name) + signature_string;
358 
359  member_type.set_is_constructor();
360  }
361  else
362  {
363  method_symbol.pretty_name = id2string(class_symbol.pretty_name) + "." +
364  id2string(m.base_name) + signature_string;
365  }
366 
367  // Load annotations
368  if(!m.annotations.empty())
369  {
371  m.annotations,
372  type_checked_cast<annotated_typet>(static_cast<typet &>(member_type))
373  .get_annotations());
374  }
375 
376  method_symbol.type=member_type;
377  // Not used in jbmc at present, but other codebases that use jbmc as a library
378  // use this information.
379  method_symbol.type.set(ID_C_abstract, m.is_abstract);
380  set_declaring_class(method_symbol, class_symbol.name);
381 
383  m.annotations, "java::org.cprover.MustNotThrow"))
384  {
385  method_symbol.type.set(ID_C_must_not_throw, true);
386  }
387 
388  // Assign names to parameters in the method symbol
389  java_method_typet &method_type = to_java_method_type(method_symbol.type);
390  method_type.set_is_final(m.is_final);
391  java_method_typet::parameterst &parameters = method_type.parameters();
392  java_bytecode_convert_methodt::method_offsett slots_for_parameters =
393  java_method_parameter_slots(method_type);
395  m, method_identifier, parameters, slots_for_parameters);
396 
397  symbol_table.add(method_symbol);
398 
399  if(!m.is_static)
400  {
401  java_class_typet::methodt new_method{method_symbol.name, method_type};
402  new_method.set_base_name(method_symbol.base_name);
403  new_method.set_pretty_name(method_symbol.pretty_name);
404  new_method.set_access(member_type.get_access());
405  new_method.set_descriptor(m.descriptor);
406 
407  to_java_class_type(class_symbol.type)
408  .methods()
409  .emplace_back(std::move(new_method));
410  }
411 }
412 
414  const irep_idt &class_identifier,
416 {
417  return
418  id2string(class_identifier) + "." + id2string(method.name) + ":" +
419  method.descriptor;
420 }
421 
424  const irep_idt &method_identifier,
425  java_method_typet::parameterst &parameters,
426  const java_bytecode_convert_methodt::method_offsett &slots_for_parameters)
427 {
428  auto variables = variablest{};
429  // Find parameter names in the local variable table
430  // and store the result in the external variables vector
431  for(const auto &v : m.local_variable_table)
432  {
433  // Skip this variable if it is not a method parameter
434  if(v.index >= slots_for_parameters)
435  continue;
436 
437  std::ostringstream id_oss;
438  id_oss << method_identifier << "::" << v.name;
439  irep_idt identifier(id_oss.str());
440  symbol_exprt result = symbol_exprt::typeless(identifier);
441  result.set(ID_C_base_name, v.name);
442 
443  // Create a new variablet in the variables vector; in fact this entry will
444  // be rewritten below in the loop that iterates through the method
445  // parameters; the only field that seem to be useful to write here is the
446  // symbol_expr, others will be rewritten
447  variables[v.index].emplace_back(result, v.start_pc, v.length);
448  }
449 
450  // The variables is a expanding_vectort, and the loop above may have expanded
451  // the vector introducing gaps where the entries are empty vectors. We now
452  // make sure that the vector of each LV slot contains exactly one variablet,
453  // which we will add below
454  std::size_t param_index = 0;
455  for(const auto &param : parameters)
456  {
457  INVARIANT(
458  variables[param_index].size() <= 1,
459  "should have at most one entry per index");
460  param_index += java_local_variable_slots(param.type());
461  }
462  INVARIANT(
463  param_index == slots_for_parameters,
464  "java_parameter_count and local computation must agree");
465  param_index = 0;
466  for(auto &param : parameters)
467  {
468  irep_idt base_name, identifier;
469 
470  // Construct a sensible base name (base_name) and a fully qualified name
471  // (identifier) for every parameter of the method under translation,
472  // regardless of whether we have an LVT or not; and assign it to the
473  // parameter object (which is stored in the type of the symbol, not in the
474  // symbol table)
475  if(param_index == 0 && param.get_this())
476  {
477  // my.package.ClassName.myMethodName:(II)I::this
478  base_name = ID_this;
479  identifier = id2string(method_identifier) + "::" + id2string(base_name);
480  }
481  else if(!variables[param_index].empty())
482  {
483  // if already present in the LVT ...
484  base_name = variables[param_index][0].symbol_expr.get(ID_C_base_name);
485  identifier = variables[param_index][0].symbol_expr.get(ID_identifier);
486  }
487  else
488  {
489  // my.package.ClassName.myMethodName:(II)I::argNT, where N is the local
490  // variable slot where the parameter is stored and T is a character
491  // indicating the type
492  char suffix = java_char_from_type(param.type());
493  base_name = "arg" + std::to_string(param_index) + suffix;
494  identifier = id2string(method_identifier) + "::" + id2string(base_name);
495  }
496  param.set_base_name(base_name);
497  param.set_identifier(identifier);
498  param_index += java_local_variable_slots(param.type());
499  }
500  // The parameter slots detected in this function should agree with what
501  // java_parameter_count() thinks about this method
502  INVARIANT(
503  param_index == slots_for_parameters,
504  "java_parameter_count and local computation must agree");
505 }
506 
508  const java_method_typet::parameterst &parameters,
509  expanding_vectort<std::vector<java_bytecode_convert_methodt::variablet>>
510  &variables,
511  symbol_table_baset &symbol_table)
512 {
513  std::size_t param_index = 0;
514  for(const auto &param : parameters)
515  {
516  parameter_symbolt parameter_symbol;
517  parameter_symbol.base_name = param.get_base_name();
518  parameter_symbol.mode = ID_java;
519  parameter_symbol.name = param.get_identifier();
520  parameter_symbol.type = param.type();
521  symbol_table.add(parameter_symbol);
522 
523  // Add as a JVM local variable
524  variables[param_index].clear();
525  variables[param_index].emplace_back(
526  parameter_symbol.symbol_expr(),
527  0,
528  std::numeric_limits<size_t>::max(),
529  true);
530  param_index += java_local_variable_slots(param.type());
531  }
532 }
533 
535  const symbolt &class_symbol,
536  const methodt &m,
537  const std::optional<prefix_filtert> &method_context)
538 {
539  // Construct the fully qualified method name
540  // (e.g. "my.package.ClassName.myMethodName:(II)I") and query the symbol table
541  // to retrieve the symbol (constructed by java_bytecode_convert_method_lazy)
542  // associated to the method
543  const irep_idt method_identifier =
544  get_method_identifier(class_symbol.name, m);
545 
546  method_id = method_identifier;
548  symbol_table.get_writeable_ref(method_identifier), class_symbol.name);
549 
550  // Obtain a std::vector of java_method_typet::parametert objects from the
551  // (function) type of the symbol
552  // Don't try to hang on to this reference into the symbol table, as we're
553  // about to create symbols for the method's parameters, which would invalidate
554  // the reference. Instead, copy the type here, set it up, then assign it back
555  // to the symbol later.
556  java_method_typet method_type =
557  to_java_method_type(symbol_table.lookup_ref(method_identifier).type);
558  method_type.set_is_final(m.is_final);
559  method_return_type = method_type.return_type();
560  java_method_typet::parameterst &parameters = method_type.parameters();
561 
562  // Determine the number of local variable slots used by the JVM to maintain
563  // the formal parameters
565 
566  log.debug() << "Generating codet: class " << class_symbol.name << ", method "
567  << m.name << messaget::eom;
568 
569  // Add parameter symbols to the symbol table
571 
572  symbolt &method_symbol = symbol_table.get_writeable_ref(method_identifier);
573 
574  // Check the fields that can't change are valid
575  INVARIANT(
576  method_symbol.name == method_identifier,
577  "Name of method symbol shouldn't change");
578  INVARIANT(
579  method_symbol.base_name == m.base_name,
580  "Base name of method symbol shouldn't change");
581  INVARIANT(
582  method_symbol.module.empty(),
583  "Method symbol shouldn't have module");
584  // Update the symbol for the method
585  method_symbol.mode=ID_java;
586  method_symbol.location=m.source_location;
587  method_symbol.location.set_function(method_identifier);
588 
589  for(const auto &exception_name : m.throws_exception_table)
590  method_type.add_throws_exception(exception_name);
591 
592  const std::string signature_string = pretty_signature(method_type);
593 
594  // Set up the pretty name for the method entry in the symbol table.
595  // The pretty name of a constructor includes the base name of the class
596  // instead of the internal method name "<init>". For regular methods, it's
597  // just the base name of the method.
598  if(is_constructor(method_symbol.base_name))
599  {
600  // we use full.class_name(...) as pretty name
601  // for constructors -- the idea is that they have
602  // an empty declarator.
603  method_symbol.pretty_name =
604  id2string(class_symbol.pretty_name) + signature_string;
605  INVARIANT(
606  method_type.get_is_constructor(),
607  "Member type should have already been marked as a constructor");
608  }
609  else
610  {
611  method_symbol.pretty_name = id2string(class_symbol.pretty_name) + "." +
612  id2string(m.base_name) + signature_string;
613  }
614 
615  method_symbol.type = method_type;
616  current_method = method_symbol.name;
617  method_has_this = method_type.has_this();
618  if((!m.is_abstract) && (!m.is_native))
619  {
620  // Do not convert if method is not in context
621  if(!method_context || (*method_context)(id2string(method_identifier)))
622  {
623  code_blockt code{convert_parameter_annotations(m, method_type)};
624  code.append(convert_instructions(m));
625  method_symbol.value = std::move(code);
626  }
627  }
628 }
629 
630 static irep_idt get_if_cmp_operator(const u1 bytecode)
631 {
632  if(bytecode == patternt("if_?cmplt"))
633  return ID_lt;
634  if(bytecode == patternt("if_?cmple"))
635  return ID_le;
636  if(bytecode == patternt("if_?cmpgt"))
637  return ID_gt;
638  if(bytecode == patternt("if_?cmpge"))
639  return ID_ge;
640  if(bytecode == patternt("if_?cmpeq"))
641  return ID_equal;
642  if(bytecode == patternt("if_?cmpne"))
643  return ID_notequal;
644 
645  throw "unhandled java comparison instruction";
646 }
647 
657  const exprt &pointer,
658  const fieldref_exprt &field_reference,
659  const namespacet &ns)
660 {
661  struct_tag_typet class_type(field_reference.class_name());
662 
663  const exprt typed_pointer =
665 
666  const irep_idt &component_name = field_reference.component_name();
667 
668  exprt accessed_object = checked_dereference(typed_pointer);
669  const auto type_of = [&ns](const exprt &object) {
670  return ns.follow_tag(to_struct_tag_type(object.type()));
671  };
672 
673  // The field access is described as being against a particular type, but it
674  // may in fact belong to any ancestor type.
675  while(type_of(accessed_object).get_component(component_name).is_nil())
676  {
677  const auto components = type_of(accessed_object).components();
678  INVARIANT(
679  components.size() != 0,
680  "infer_opaque_type_fields should guarantee that a member access has a "
681  "corresponding field");
682 
683  // Base class access is always done through the first component
684  accessed_object = member_exprt(accessed_object, components.front());
685  }
686  return member_exprt(
687  accessed_object, type_of(accessed_object).get_component(component_name));
688 }
689 
696  codet &repl,
697  const irep_idt &old_label,
698  const irep_idt &new_label)
699 {
700  const auto &stmt=repl.get_statement();
701  if(stmt==ID_goto)
702  {
703  auto &g=to_code_goto(repl);
704  if(g.get_destination()==old_label)
705  g.set_destination(new_label);
706  }
707  else
708  {
709  for(auto &op : repl.operands())
710  if(op.id()==ID_code)
711  replace_goto_target(to_code(op), old_label, new_label);
712  }
713 }
714 
730  block_tree_nodet &tree,
731  code_blockt &this_block,
732  method_offsett address_start,
733  method_offsett address_limit,
734  method_offsett next_block_start_address)
735 {
736  address_mapt dummy;
738  tree,
739  this_block,
740  address_start,
741  address_limit,
742  next_block_start_address,
743  dummy,
744  false);
745 }
746 
767  block_tree_nodet &tree,
768  code_blockt &this_block,
769  method_offsett address_start,
770  method_offsett address_limit,
771  method_offsett next_block_start_address,
772  const address_mapt &amap,
773  bool allow_merge)
774 {
775  // Check the tree shape invariant:
776  PRECONDITION(tree.branch.size() == tree.branch_addresses.size());
777 
778  // If there are no child blocks, return this.
779  if(tree.leaf)
780  return this_block;
781  PRECONDITION(!tree.branch.empty());
782 
783  // Find child block starting > address_start:
784  const auto afterstart=
785  std::upper_bound(
786  tree.branch_addresses.begin(),
787  tree.branch_addresses.end(),
788  address_start);
789  CHECK_RETURN(afterstart != tree.branch_addresses.begin());
790  auto findstart=afterstart;
791  --findstart;
792  auto child_offset=
793  std::distance(tree.branch_addresses.begin(), findstart);
794 
795  // Find child block starting >= address_limit:
796  auto findlim=
797  std::lower_bound(
798  tree.branch_addresses.begin(),
799  tree.branch_addresses.end(),
800  address_limit);
801  const auto findlim_block_start_address =
802  findlim == tree.branch_addresses.end() ? next_block_start_address
803  : (*findlim);
804 
805  // If all children are in scope, return this.
806  if(findstart==tree.branch_addresses.begin() &&
807  findlim==tree.branch_addresses.end())
808  return this_block;
809 
810  // Find the child code_blockt where the queried range begins:
811  auto child_iter = this_block.statements().begin();
812  // Skip any top-of-block declarations;
813  // all other children are labelled subblocks.
814  while(child_iter != this_block.statements().end() &&
815  child_iter->get_statement() == ID_decl)
816  ++child_iter;
817  CHECK_RETURN(child_iter != this_block.statements().end());
818  std::advance(child_iter, child_offset);
819  CHECK_RETURN(child_iter != this_block.statements().end());
820  auto &child_label=to_code_label(*child_iter);
821  auto &child_block=to_code_block(child_label.code());
822 
823  bool single_child(afterstart==findlim);
824  if(single_child)
825  {
826  // Range wholly contained within a child block
828  tree.branch[child_offset],
829  child_block,
830  address_start,
831  address_limit,
832  findlim_block_start_address,
833  amap,
834  allow_merge);
835  }
836 
837  // Otherwise we're being asked for a range of subblocks, but not all of them.
838  // If it's legal to draw a new lexical scope around the requested subset,
839  // do so; otherwise just return this block.
840 
841  // This can be a new lexical scope if all incoming edges target the
842  // new block header, or come from within the suggested new block.
843 
844  // If modifying the block tree is forbidden, give up and return this:
845  if(!allow_merge)
846  return this_block;
847 
848  // Check for incoming control-flow edges targeting non-header
849  // blocks of the new proposed block range:
850  auto checkit=amap.find(*findstart);
851  CHECK_RETURN(checkit != amap.end());
852  ++checkit; // Skip the header, which can have incoming edges from outside.
853  for(;
854  checkit!=amap.end() && (checkit->first)<(findlim_block_start_address);
855  ++checkit)
856  {
857  for(auto p : checkit->second.predecessors)
858  {
859  if(p<(*findstart) || p>=findlim_block_start_address)
860  {
861  log.debug() << "Generating codet: "
862  << "warning: refusing to create lexical block spanning "
863  << (*findstart) << "-" << findlim_block_start_address
864  << " due to incoming edge " << p << " -> " << checkit->first
865  << messaget::eom;
866  return this_block;
867  }
868  }
869  }
870 
871  // All incoming edges are acceptable! Create a new block wrapping
872  // the relevant children. Borrow the header block's label, and redirect
873  // any block-internal edges to target the inner header block.
874 
875  const irep_idt child_label_name=child_label.get_label();
876  std::string new_label_str = id2string(child_label_name);
877  new_label_str+='$';
878  irep_idt new_label_irep(new_label_str);
879 
880  code_labelt newlabel(child_label_name, code_blockt());
881  code_blockt &newblock=to_code_block(newlabel.code());
882  auto nblocks=std::distance(findstart, findlim);
883  CHECK_RETURN(nblocks >= 2);
884  log.debug() << "Generating codet: combining "
885  << std::distance(findstart, findlim) << " blocks for addresses "
886  << (*findstart) << "-" << findlim_block_start_address
887  << messaget::eom;
888 
889  // Make a new block containing every child of interest:
890  auto &this_block_children = this_block.statements();
891  CHECK_RETURN(tree.branch.size() == this_block_children.size());
892  for(auto blockidx=child_offset, blocklim=child_offset+nblocks;
893  blockidx!=blocklim;
894  ++blockidx)
895  newblock.add(this_block_children[blockidx]);
896 
897  // Relabel the inner header:
898  to_code_label(newblock.statements()[0]).set_label(new_label_irep);
899  // Relabel internal gotos:
900  replace_goto_target(newblock, child_label_name, new_label_irep);
901 
902  // Remove the now-empty sibling blocks:
903  auto delfirst=this_block_children.begin();
904  std::advance(delfirst, child_offset+1);
905  auto dellim=delfirst;
906  std::advance(dellim, nblocks-1);
907  this_block_children.erase(delfirst, dellim);
908  this_block_children[child_offset].swap(newlabel);
909 
910  // Perform the same transformation on the index tree:
911  block_tree_nodet newnode;
912  auto branchstart=tree.branch.begin();
913  std::advance(branchstart, child_offset);
914  auto branchlim=branchstart;
915  std::advance(branchlim, nblocks);
916  for(auto branchiter=branchstart; branchiter!=branchlim; ++branchiter)
917  newnode.branch.push_back(std::move(*branchiter));
918  ++branchstart;
919  tree.branch.erase(branchstart, branchlim);
920 
921  CHECK_RETURN(tree.branch.size() == this_block_children.size());
922 
923  auto branchaddriter=tree.branch_addresses.begin();
924  std::advance(branchaddriter, child_offset);
925  auto branchaddrlim=branchaddriter;
926  std::advance(branchaddrlim, nblocks);
927  newnode.branch_addresses.insert(
928  newnode.branch_addresses.begin(),
929  branchaddriter,
930  branchaddrlim);
931 
932  ++branchaddriter;
933  tree.branch_addresses.erase(branchaddriter, branchaddrlim);
934 
935  tree.branch[child_offset]=std::move(newnode);
936 
937  CHECK_RETURN(tree.branch.size() == tree.branch_addresses.size());
938 
939  return
942  this_block_children[child_offset]).code());
943 }
944 
947  const exprt &e,
948  std::map<irep_idt, java_bytecode_convert_methodt::variablet> &result)
949 {
950  if(e.id()==ID_symbol)
951  {
952  const auto &symexpr=to_symbol_expr(e);
953  auto findit = result.emplace(
954  std::piecewise_construct,
955  std::forward_as_tuple(symexpr.get_identifier()),
956  std::forward_as_tuple(symexpr, pc, 1));
957  if(!findit.second)
958  {
959  auto &var = findit.first->second;
960 
961  if(pc<var.start_pc)
962  {
963  var.length+=(var.start_pc-pc);
964  var.start_pc=pc;
965  }
966  else
967  {
968  var.length=std::max(var.length, (pc-var.start_pc)+1);
969  }
970  }
971  }
972  else
973  {
974  for(const auto &op : e.operands())
975  gather_symbol_live_ranges(pc, op, result);
976  }
977 }
978 
985  const irep_idt &classname)
986 {
987  auto findit = symbol_table.symbols.find(clinit_wrapper_name(classname));
988  if(findit == symbol_table.symbols.end())
989  return code_skipt();
990  else
991  {
992  const code_function_callt ret(findit->second.symbol_expr());
994  needed_lazy_methods->add_needed_method(findit->second.name);
995  return ret;
996  }
997 }
998 
999 static std::size_t get_bytecode_type_width(const typet &ty)
1000 {
1001  if(ty.id()==ID_pointer)
1002  return 32;
1003  return to_bitvector_type(ty).get_width();
1004 }
1005 
1007  const methodt &method,
1008  const java_method_typet &method_type)
1009 {
1010  code_blockt code;
1011 
1012  // Consider parameter annotations
1013  const java_method_typet::parameterst &parameters(method_type.parameters());
1014  std::size_t param_index = method_type.has_this() ? 1 : 0;
1016  parameters.size() >= method.parameter_annotations.size() + param_index,
1017  "parameters and parameter annotations mismatch");
1018  for(const auto &param_annotations : method.parameter_annotations)
1019  {
1020  // NotNull annotations are not standardized. We support these:
1021  if(
1023  param_annotations, "java::javax.validation.constraints.NotNull") ||
1025  param_annotations, "java::org.jetbrains.annotations.NotNull") ||
1027  param_annotations, "org.eclipse.jdt.annotation.NonNull") ||
1029  param_annotations, "java::edu.umd.cs.findbugs.annotations.NonNull"))
1030  {
1031  const irep_idt &param_identifier =
1032  parameters[param_index].get_identifier();
1033  const symbolt &param_symbol = symbol_table.lookup_ref(param_identifier);
1034  const auto param_type =
1035  type_try_dynamic_cast<pointer_typet>(param_symbol.type);
1036  if(param_type)
1037  {
1038  code_assertt code_assert(notequal_exprt(
1039  param_symbol.symbol_expr(), null_pointer_exprt(*param_type)));
1040  source_locationt check_loc = method.source_location;
1041  check_loc.set_comment("Not null annotation check");
1042  check_loc.set_property_class("not-null-annotation-check");
1043  code_assert.add_source_location() = check_loc;
1044 
1045  code.add(std::move(code_assert));
1046  }
1047  }
1048  ++param_index;
1049  }
1050 
1051  return code;
1052 }
1053 
1056 {
1057  const instructionst &instructions=method.instructions;
1058 
1059  // Run a worklist algorithm, assuming that the bytecode has not
1060  // been tampered with. See "Leroy, X. (2003). Java bytecode
1061  // verification: algorithms and formalizations. Journal of Automated
1062  // Reasoning, 30(3-4), 235-269." for a more complete treatment.
1063 
1064  // first pass: get targets and map addresses to instructions
1065 
1066  address_mapt address_map;
1067  std::set<method_offsett> targets;
1068 
1069  std::vector<method_offsett> jsr_ret_targets;
1070  std::vector<instructionst::const_iterator> ret_instructions;
1071 
1072  for(auto i_it = instructions.begin(); i_it != instructions.end(); i_it++)
1073  {
1075  std::pair<address_mapt::iterator, bool> a_entry=
1076  address_map.insert(std::make_pair(i_it->address, ins));
1077  CHECK_RETURN(a_entry.second);
1078  // addresses are strictly increasing, hence we must have inserted
1079  // a new maximal key
1080  CHECK_RETURN(a_entry.first == --address_map.end());
1081 
1082  const auto bytecode = i_it->bytecode;
1083  const std::string statement = bytecode_info[i_it->bytecode].mnemonic;
1084 
1085  // clang-format off
1086  if(bytecode != BC_goto &&
1087  bytecode != BC_return &&
1088  bytecode != patternt("?return") &&
1089  bytecode != BC_athrow &&
1090  bytecode != BC_jsr &&
1091  bytecode != BC_jsr_w &&
1092  bytecode != BC_ret)
1093  {
1094  // clang-format on
1095  instructionst::const_iterator next=i_it;
1096  if(++next!=instructions.end())
1097  a_entry.first->second.successors.push_back(next->address);
1098  }
1099 
1100  // clang-format off
1101  if(bytecode == BC_athrow ||
1102  bytecode == BC_putfield ||
1103  bytecode == BC_getfield ||
1104  bytecode == BC_checkcast ||
1105  bytecode == BC_newarray ||
1106  bytecode == BC_anewarray ||
1107  bytecode == BC_idiv ||
1108  bytecode == BC_ldiv ||
1109  bytecode == BC_irem ||
1110  bytecode == BC_lrem ||
1111  bytecode == patternt("?astore") ||
1112  bytecode == patternt("?aload") ||
1113  bytecode == BC_invokestatic ||
1114  bytecode == BC_invokevirtual ||
1115  bytecode == BC_invokespecial ||
1116  bytecode == BC_invokeinterface ||
1117  (threading_support &&
1118  (bytecode == BC_monitorenter || bytecode == BC_monitorexit)))
1119  {
1120  // clang-format on
1121  const std::vector<method_offsett> handler =
1122  try_catch_handler(i_it->address, method.exception_table);
1123  std::list<method_offsett> &successors = a_entry.first->second.successors;
1124  successors.insert(successors.end(), handler.begin(), handler.end());
1125  targets.insert(handler.begin(), handler.end());
1126  }
1127 
1128  // clang-format off
1129  if(bytecode == BC_goto ||
1130  bytecode == patternt("if_?cmp??") ||
1131  bytecode == patternt("if??") ||
1132  bytecode == BC_ifnonnull ||
1133  bytecode == BC_ifnull ||
1134  bytecode == BC_jsr ||
1135  bytecode == BC_jsr_w)
1136  {
1137  // clang-format on
1138  PRECONDITION(!i_it->args.empty());
1139 
1140  auto target = numeric_cast_v<unsigned>(to_constant_expr(i_it->args[0]));
1141  targets.insert(target);
1142 
1143  a_entry.first->second.successors.push_back(target);
1144 
1145  if(bytecode == BC_jsr || bytecode == BC_jsr_w)
1146  {
1147  auto next = std::next(i_it);
1149  next != instructions.end(), "jsr should have valid return address");
1150  targets.insert(next->address);
1151  jsr_ret_targets.push_back(next->address);
1152  }
1153  }
1154  else if(bytecode == BC_tableswitch || bytecode == BC_lookupswitch)
1155  {
1156  bool is_label=true;
1157  for(const auto &arg : i_it->args)
1158  {
1159  if(is_label)
1160  {
1161  auto target = numeric_cast_v<unsigned>(to_constant_expr(arg));
1162  targets.insert(target);
1163  a_entry.first->second.successors.push_back(target);
1164  }
1165  is_label=!is_label;
1166  }
1167  }
1168  else if(bytecode == BC_ret)
1169  {
1170  // Finish these later, once we've seen all jsr instructions.
1171  ret_instructions.push_back(i_it);
1172  }
1173  }
1174  draw_edges_from_ret_to_jsr(address_map, jsr_ret_targets, ret_instructions);
1175 
1176  for(const auto &address : address_map)
1177  {
1178  for(auto s : address.second.successors)
1179  {
1180  const auto a_it = address_map.find(s);
1181  CHECK_RETURN(a_it != address_map.end());
1182  a_it->second.predecessors.insert(address.first);
1183  }
1184  }
1185 
1186  // Clean the list of temporary variables created by a call to `tmp_variable`.
1187  // These are local variables in the goto function used to represent temporary
1188  // values of the JVM operand stack, newly allocated objects before the
1189  // constructor is called, ...
1190  tmp_vars.clear();
1191 
1192  // Now that the control flow graph is built, set up our local variables
1193  // (these require the graph to determine live ranges)
1194  setup_local_variables(method, address_map);
1195 
1196  std::set<method_offsett> working_set;
1197 
1198  if(!instructions.empty())
1199  working_set.insert(instructions.front().address);
1200 
1201  while(!working_set.empty())
1202  {
1203  auto cur = working_set.begin();
1204  auto address_it = address_map.find(*cur);
1205  CHECK_RETURN(address_it != address_map.end());
1206  auto &instruction = address_it->second;
1207  const method_offsett cur_pc = *cur;
1208  working_set.erase(cur);
1209 
1210  if(instruction.done)
1211  continue;
1212  working_set.insert(
1213  instruction.successors.begin(), instruction.successors.end());
1214 
1215  instructionst::const_iterator i_it = instruction.source;
1216  stack.swap(instruction.stack);
1217  instruction.stack.clear();
1218  codet &c = instruction.code;
1219 
1220  INVARIANT(
1221  stack.empty() || instruction.predecessors.size() <= 1 ||
1222  has_prefix(stack.front().get_string(ID_C_base_name), "$stack"),
1223  "inconsistent stack");
1224 
1225  exprt arg0=i_it->args.size()>=1?i_it->args[0]:nil_exprt();
1226  exprt arg1=i_it->args.size()>=2?i_it->args[1]:nil_exprt();
1227 
1228  const auto bytecode = i_it->bytecode;
1229  const bytecode_infot &stmt_bytecode_info = bytecode_info[i_it->bytecode];
1230  const std::string statement = stmt_bytecode_info.mnemonic;
1231 
1232  // deal with _idx suffixes
1233  if(statement.size()>=2 &&
1234  statement[statement.size()-2]=='_' &&
1235  isdigit(statement[statement.size()-1]))
1236  {
1237  arg0=
1238  from_integer(
1240  std::string(id2string(statement), statement.size()-1, 1)),
1241  java_int_type());
1242  }
1243 
1244  typet catch_type;
1245 
1246  // Find catch blocks that begin here. For now we assume if more than
1247  // one catch targets the same bytecode then we must be indifferent to
1248  // its type and just call it a Throwable.
1249  auto it=method.exception_table.begin();
1250  for(; it!=method.exception_table.end(); ++it)
1251  {
1252  if(cur_pc==it->handler_pc)
1253  {
1254  if(
1255  catch_type != typet() ||
1256  it->catch_type == struct_tag_typet(irep_idt()))
1257  {
1258  catch_type = struct_tag_typet("java::java.lang.Throwable");
1259  break;
1260  }
1261  else
1262  catch_type=it->catch_type;
1263  }
1264  }
1265 
1266  std::optional<codet> catch_instruction;
1267 
1268  if(catch_type!=typet())
1269  {
1270  // at the beginning of a handler, clear the stack and
1271  // push the corresponding exceptional return variable
1272  // We also create a catch exception instruction that
1273  // precedes the catch block, and which remove_exceptionst
1274  // will transform into something like:
1275  // catch_var = GLOBAL_THROWN_EXCEPTION;
1276  // GLOBAL_THROWN_EXCEPTION = null;
1277  stack.clear();
1278  symbol_exprt catch_var=
1279  tmp_variable(
1280  "caught_exception",
1281  java_reference_type(catch_type));
1282  stack.push_back(catch_var);
1283  catch_instruction = code_landingpadt(catch_var);
1284  }
1285 
1286  exprt::operandst op = pop(stmt_bytecode_info.pop);
1287  exprt::operandst results;
1288  results.resize(stmt_bytecode_info.push, nil_exprt());
1289 
1290  if(bytecode == BC_aconst_null)
1291  {
1292  PRECONDITION(results.size() == 1);
1294  }
1295  else if(bytecode == BC_athrow)
1296  {
1297  PRECONDITION(op.size() == 1 && results.size() == 1);
1298  convert_athrow(i_it->source_location, op, c, results);
1299  }
1300  else if(bytecode == BC_checkcast)
1301  {
1302  // checkcast throws an exception in case a cast of object
1303  // on stack to given type fails.
1304  // The stack isn't modified.
1305  PRECONDITION(op.size() == 1 && results.size() == 1);
1306  convert_checkcast(arg0, op, c, results);
1307  }
1308  else if(bytecode == BC_invokedynamic)
1309  {
1310  if(
1311  const auto res =
1312  convert_invoke_dynamic(i_it->source_location, i_it->address, arg0, c))
1313  {
1314  results.resize(1);
1315  results[0] = *res;
1316  }
1317  }
1318  else if(
1319  bytecode == BC_invokestatic && id2string(arg0.get(ID_identifier)) ==
1320  "java::org.cprover.CProver.assume:(Z)V")
1321  {
1322  const java_method_typet &method_type = to_java_method_type(arg0.type());
1323  INVARIANT(
1324  method_type.parameters().size() == 1,
1325  "function expected to have exactly one parameter");
1326  c = replace_call_to_cprover_assume(i_it->source_location, c);
1327  }
1328  // replace calls to CProver.atomicBegin
1329  else if(
1330  bytecode == BC_invokestatic &&
1331  arg0.get(ID_identifier) == "java::org.cprover.CProver.atomicBegin:()V")
1332  {
1333  c = codet(ID_atomic_begin);
1334  }
1335  // replace calls to CProver.atomicEnd
1336  else if(
1337  bytecode == BC_invokestatic &&
1338  arg0.get(ID_identifier) == "java::org.cprover.CProver.atomicEnd:()V")
1339  {
1340  c = codet(ID_atomic_end);
1341  }
1342  else if(
1343  bytecode == BC_invokeinterface || bytecode == BC_invokespecial ||
1344  bytecode == BC_invokevirtual || bytecode == BC_invokestatic)
1345  {
1346  class_method_descriptor_exprt *class_method_descriptor =
1347  expr_try_dynamic_cast<class_method_descriptor_exprt>(arg0);
1348 
1349  INVARIANT(
1350  class_method_descriptor,
1351  "invokeinterface, invokespecial, invokevirtual and invokestatic should "
1352  "be called with a class method descriptor expression as arg0");
1353 
1355  i_it->source_location, statement, *class_method_descriptor, c, results);
1356  }
1357  else if(bytecode == BC_return)
1358  {
1359  PRECONDITION(op.empty() && results.empty());
1360  c = code_frontend_returnt();
1361  }
1362  else if(bytecode == patternt("?return"))
1363  {
1364  // Return types are promoted in java, so this might need
1365  // conversion.
1366  PRECONDITION(op.size() == 1 && results.empty());
1367  const exprt r =
1369  c = code_frontend_returnt(r);
1370  }
1371  else if(bytecode == patternt("?astore"))
1372  {
1373  PRECONDITION(results.empty());
1374  c = convert_astore(statement, op, i_it->source_location);
1375  }
1376  else if(bytecode == patternt("?store") || bytecode == patternt("?store_?"))
1377  {
1378  // store value into some local variable
1379  PRECONDITION(op.size() == 1 && results.empty());
1380  c = convert_store(
1381  statement, arg0, op, i_it->address, i_it->source_location);
1382  }
1383  else if(bytecode == patternt("?aload"))
1384  {
1385  PRECONDITION(results.size() == 1);
1386  results[0] = convert_aload(statement, op);
1387  }
1388  else if(bytecode == patternt("?load") || bytecode == patternt("?load_?"))
1389  {
1390  // load a value from a local variable
1391  results[0] = convert_load(arg0, statement[0], i_it->address);
1392  }
1393  else if(bytecode == BC_ldc || bytecode == BC_ldc_w || bytecode == BC_ldc2_w)
1394  {
1395  PRECONDITION(op.empty() && results.size() == 1);
1396 
1397  INVARIANT(
1398  !can_cast_expr<java_string_literal_exprt>(arg0) && arg0.id() != ID_type,
1399  "String and Class literals should have been lowered in "
1400  "generate_constant_global_variables");
1401 
1402  results[0] = arg0;
1403  }
1404  else if(bytecode == BC_goto || bytecode == BC_goto_w)
1405  {
1406  PRECONDITION(op.empty() && results.empty());
1407  const mp_integer number =
1408  numeric_cast_v<mp_integer>(to_constant_expr(arg0));
1409  code_gotot code_goto(label(integer2string(number)));
1410  c=code_goto;
1411  }
1412  else if(bytecode == BC_jsr || bytecode == BC_jsr_w)
1413  {
1414  // As 'goto', except we must also push the subroutine return address:
1415  PRECONDITION(op.empty() && results.size() == 1);
1416  const mp_integer number =
1417  numeric_cast_v<mp_integer>(to_constant_expr(arg0));
1418  code_gotot code_goto(label(integer2string(number)));
1419  c=code_goto;
1420  results[0]=
1421  from_integer(
1422  std::next(i_it)->address,
1423  unsignedbv_typet(64));
1424  results[0].type() = pointer_type(java_void_type());
1425  }
1426  else if(bytecode == BC_ret)
1427  {
1428  // Since we have a bounded target set, make life easier on our analyses
1429  // and write something like:
1430  // if(retaddr==5) goto 5; else if(retaddr==10) goto 10; ...
1431  PRECONDITION(op.empty() && results.empty());
1432  PRECONDITION(!jsr_ret_targets.empty());
1433  c = convert_ret(
1434  jsr_ret_targets, arg0, i_it->source_location, i_it->address);
1435  }
1436  else if(bytecode == BC_iconst_m1)
1437  {
1438  CHECK_RETURN(results.size() == 1);
1439  results[0]=from_integer(-1, java_int_type());
1440  }
1441  else if(bytecode == patternt("?const_?"))
1442  {
1443  CHECK_RETURN(results.size() == 1);
1444  results = convert_const(statement, to_constant_expr(arg0), results);
1445  }
1446  else if(bytecode == patternt("?ipush"))
1447  {
1448  CHECK_RETURN(results.size() == 1);
1450  arg0.is_constant(), "ipush argument expected to be constant");
1451  results[0] = typecast_exprt::conditional_cast(arg0, java_int_type());
1452  }
1453  else if(bytecode == patternt("if_?cmp??"))
1454  {
1455  PRECONDITION(op.size() == 2 && results.empty());
1456  const mp_integer number =
1457  numeric_cast_v<mp_integer>(to_constant_expr(arg0));
1458  c = convert_if_cmp(
1459  address_map, bytecode, op, number, i_it->source_location);
1460  }
1461  else if(bytecode == patternt("if??"))
1462  {
1463  // clang-format off
1464  const irep_idt id=
1465  bytecode == BC_ifeq ? ID_equal :
1466  bytecode == BC_ifne ? ID_notequal :
1467  bytecode == BC_iflt ? ID_lt :
1468  bytecode == BC_ifge ? ID_ge :
1469  bytecode == BC_ifgt ? ID_gt :
1470  bytecode == BC_ifle ? ID_le :
1471  irep_idt();
1472  // clang-format on
1473 
1474  INVARIANT(!id.empty(), "unexpected bytecode-if");
1475  PRECONDITION(op.size() == 1 && results.empty());
1476  const mp_integer number =
1477  numeric_cast_v<mp_integer>(to_constant_expr(arg0));
1478  c = convert_if(address_map, op, id, number, i_it->source_location);
1479  }
1480  else if(bytecode == patternt("ifnonnull"))
1481  {
1482  PRECONDITION(op.size() == 1 && results.empty());
1483  const mp_integer number =
1484  numeric_cast_v<mp_integer>(to_constant_expr(arg0));
1485  c = convert_ifnonull(address_map, op, number, i_it->source_location);
1486  }
1487  else if(bytecode == patternt("ifnull"))
1488  {
1489  PRECONDITION(op.size() == 1 && results.empty());
1490  const mp_integer number =
1491  numeric_cast_v<mp_integer>(to_constant_expr(arg0));
1492  c = convert_ifnull(address_map, op, number, i_it->source_location);
1493  }
1494  else if(bytecode == BC_iinc)
1495  {
1496  c = convert_iinc(arg0, arg1, i_it->source_location, i_it->address);
1497  }
1498  else if(bytecode == patternt("?xor"))
1499  {
1500  PRECONDITION(op.size() == 2 && results.size() == 1);
1501  results[0]=bitxor_exprt(op[0], op[1]);
1502  }
1503  else if(bytecode == patternt("?or"))
1504  {
1505  PRECONDITION(op.size() == 2 && results.size() == 1);
1506  results[0]=bitor_exprt(op[0], op[1]);
1507  }
1508  else if(bytecode == patternt("?and"))
1509  {
1510  PRECONDITION(op.size() == 2 && results.size() == 1);
1511  results[0]=bitand_exprt(op[0], op[1]);
1512  }
1513  else if(bytecode == patternt("?shl"))
1514  {
1515  PRECONDITION(op.size() == 2 && results.size() == 1);
1516  results = convert_shl(statement, op, results);
1517  }
1518  else if(bytecode == patternt("?shr"))
1519  {
1520  PRECONDITION(op.size() == 2 && results.size() == 1);
1521  results[0]=ashr_exprt(op[0], op[1]);
1522  }
1523  else if(bytecode == patternt("?ushr"))
1524  {
1525  PRECONDITION(op.size() == 2 && results.size() == 1);
1526  results = convert_ushr(statement, op, results);
1527  }
1528  else if(bytecode == patternt("?add"))
1529  {
1530  PRECONDITION(op.size() == 2 && results.size() == 1);
1531  results[0]=plus_exprt(op[0], op[1]);
1532  }
1533  else if(bytecode == patternt("?sub"))
1534  {
1535  PRECONDITION(op.size() == 2 && results.size() == 1);
1536  results[0]=minus_exprt(op[0], op[1]);
1537  }
1538  else if(bytecode == patternt("?div"))
1539  {
1540  PRECONDITION(op.size() == 2 && results.size() == 1);
1541  results[0]=div_exprt(op[0], op[1]);
1542  }
1543  else if(bytecode == patternt("?mul"))
1544  {
1545  PRECONDITION(op.size() == 2 && results.size() == 1);
1546  results[0]=mult_exprt(op[0], op[1]);
1547  }
1548  else if(bytecode == patternt("?neg"))
1549  {
1550  PRECONDITION(op.size() == 1 && results.size() == 1);
1551  results[0]=unary_minus_exprt(op[0], op[0].type());
1552  }
1553  else if(bytecode == patternt("?rem"))
1554  {
1555  PRECONDITION(op.size() == 2 && results.size() == 1);
1556  // This is _not_ the remainder operation defined by IEEE 754,
1557  // but similar to the fmod C library function.
1558  if(bytecode == BC_frem || bytecode == BC_drem)
1559  results[0] = binary_exprt(op[0], ID_floatbv_mod, op[1]);
1560  else
1561  results[0]=mod_exprt(op[0], op[1]);
1562  }
1563  else if(bytecode == patternt("?cmp"))
1564  {
1565  PRECONDITION(op.size() == 2 && results.size() == 1);
1566  results = convert_cmp(op, results);
1567  }
1568  else if(bytecode == patternt("?cmp?"))
1569  {
1570  PRECONDITION(op.size() == 2 && results.size() == 1);
1571  results = convert_cmp2(statement, op, results);
1572  }
1573  else if(bytecode == patternt("?cmpl"))
1574  {
1575  PRECONDITION(op.size() == 2 && results.size() == 1);
1576  results[0]=binary_relation_exprt(op[0], ID_lt, op[1]);
1577  }
1578  else if(bytecode == BC_dup)
1579  {
1580  PRECONDITION(op.size() == 1 && results.size() == 2);
1581  results[0]=results[1]=op[0];
1582  }
1583  else if(bytecode == BC_dup_x1)
1584  {
1585  PRECONDITION(op.size() == 2 && results.size() == 3);
1586  results[0]=op[1];
1587  results[1]=op[0];
1588  results[2]=op[1];
1589  }
1590  else if(bytecode == BC_dup_x2)
1591  {
1592  PRECONDITION(op.size() == 3 && results.size() == 4);
1593  results[0]=op[2];
1594  results[1]=op[0];
1595  results[2]=op[1];
1596  results[3]=op[2];
1597  }
1598  // dup2* behaviour depends on the size of the operands on the
1599  // stack
1600  else if(bytecode == BC_dup2)
1601  {
1602  PRECONDITION(!stack.empty() && results.empty());
1603  convert_dup2(op, results);
1604  }
1605  else if(bytecode == BC_dup2_x1)
1606  {
1607  PRECONDITION(!stack.empty() && results.empty());
1608  convert_dup2_x1(op, results);
1609  }
1610  else if(bytecode == BC_dup2_x2)
1611  {
1612  PRECONDITION(!stack.empty() && results.empty());
1613  convert_dup2_x2(op, results);
1614  }
1615  else if(bytecode == BC_getfield)
1616  {
1617  PRECONDITION(op.size() == 1 && results.size() == 1);
1618  results[0] = java_bytecode_promotion(
1619  to_member(op[0], expr_dynamic_cast<fieldref_exprt>(arg0), ns));
1620  }
1621  else if(bytecode == BC_getstatic)
1622  {
1623  PRECONDITION(op.empty() && results.size() == 1);
1624  const auto &field_name=arg0.get_string(ID_component_name);
1625  const bool is_assertions_disabled_field=
1626  field_name.find("$assertionsDisabled")!=std::string::npos;
1627 
1628  const irep_idt field_id(
1629  get_static_field(arg0.get_string(ID_class), field_name));
1630 
1631  // Symbol should have been populated by java_bytecode_convert_class:
1632  const symbol_exprt symbol_expr(
1633  symbol_table.lookup_ref(field_id).symbol_expr());
1634 
1636  i_it->source_location,
1637  arg0,
1638  symbol_expr,
1639  is_assertions_disabled_field,
1640  c,
1641  results);
1642  }
1643  else if(bytecode == BC_putfield)
1644  {
1645  PRECONDITION(op.size() == 2 && results.empty());
1646  c = convert_putfield(expr_dynamic_cast<fieldref_exprt>(arg0), op);
1647  }
1648  else if(bytecode == BC_putstatic)
1649  {
1650  PRECONDITION(op.size() == 1 && results.empty());
1651  const auto &field_name=arg0.get_string(ID_component_name);
1652 
1653  const irep_idt field_id(
1654  get_static_field(arg0.get_string(ID_class), field_name));
1655 
1656  // Symbol should have been populated by java_bytecode_convert_class:
1657  const symbol_exprt symbol_expr(
1658  symbol_table.lookup_ref(field_id).symbol_expr());
1659 
1660  c = convert_putstatic(i_it->source_location, arg0, op, symbol_expr);
1661  }
1662  else if(
1663  bytecode == BC_f2i || bytecode == BC_f2l || bytecode == BC_d2i ||
1664  bytecode == BC_d2l)
1665  {
1666  PRECONDITION(op.size() == 1 && results.size() == 1);
1667  typet src_type = java_type_from_char(statement[0]);
1668  typet dest_type = java_type_from_char(statement[2]);
1669 
1670  // See JLS 5.1.3. Narrowing Primitive Conversion
1671  // +-NaN is converted to 0
1672  // +-Inf resp. values beyond the int/long range
1673  // are mapped to max/min of int/long.
1674  // Other values are rounded towards zero
1675 
1676  // for int: 2147483647, for long: 9223372036854775807L
1677  exprt largest_as_dest =
1679 
1680  // 2147483647 is not exactly representable in float;
1681  // it will be rounded up to 2147483648, which is fine.
1682  // 9223372036854775807L is not exactly representable
1683  // neither in float nor in double; it is rounded up to
1684  // 9223372036854775808.0, which is fine.
1685  exprt largest_as_src =
1686  from_integer(to_integer_bitvector_type(dest_type).largest(), src_type);
1687 
1688  // for int: -2147483648, for long: -9223372036854775808L
1689  exprt smallest_as_dest =
1691 
1692  // -2147483648 and -9223372036854775808L are exactly
1693  // representable in float and double.
1694  exprt smallest_as_src =
1695  from_integer(to_integer_bitvector_type(dest_type).smallest(), src_type);
1696 
1697  results[0] = if_exprt(
1698  binary_relation_exprt(op[0], ID_le, smallest_as_src),
1699  smallest_as_dest,
1700  if_exprt(
1701  binary_relation_exprt(op[0], ID_ge, largest_as_src),
1702  largest_as_dest,
1703  typecast_exprt::conditional_cast(op[0], dest_type)));
1704  }
1705  else if(bytecode == patternt("?2?")) // i2c etc.
1706  {
1707  PRECONDITION(op.size() == 1 && results.size() == 1);
1708  typet type=java_type_from_char(statement[2]);
1709  results[0] = typecast_exprt::conditional_cast(op[0], type);
1710 
1711  // These types get converted/truncated then immediately turned back into
1712  // ints again, so we just double-cast here.
1713  if(
1714  type == java_char_type() || type == java_byte_type() ||
1715  type == java_short_type())
1716  {
1717  results[0] = typecast_exprt(results[0], java_int_type());
1718  }
1719  }
1720  else if(bytecode == BC_new)
1721  {
1722  // use temporary since the stack symbol might get duplicated
1723  PRECONDITION(op.empty() && results.size() == 1);
1724  convert_new(i_it->source_location, arg0, c, results);
1725  }
1726  else if(bytecode == BC_newarray || bytecode == BC_anewarray)
1727  {
1728  // the op is the array size
1729  PRECONDITION(op.size() == 1 && results.size() == 1);
1730  c = convert_newarray(i_it->source_location, statement, arg0, op, results);
1731  }
1732  else if(bytecode == BC_multianewarray)
1733  {
1734  // The first argument is the type, the second argument is the number of
1735  // dimensions. The size of each dimension is on the stack.
1736  const std::size_t dimension =
1737  numeric_cast_v<std::size_t>(to_constant_expr(arg1));
1738 
1739  op=pop(dimension);
1740  CHECK_RETURN(results.size() == 1);
1741  c = convert_multianewarray(i_it->source_location, arg0, op, results);
1742  }
1743  else if(bytecode == BC_arraylength)
1744  {
1745  PRECONDITION(op.size() == 1 && results.size() == 1);
1746 
1747  // any array type is fine here, so we go for a reference array
1748  dereference_exprt array{typecast_exprt{op[0], java_array_type('a')}};
1749  PRECONDITION(array.type().id() == ID_struct_tag);
1750  array.set(ID_java_member_access, true);
1751 
1752  results[0] = member_exprt{std::move(array), "length", java_int_type()};
1753  }
1754  else if(bytecode == BC_tableswitch || bytecode == BC_lookupswitch)
1755  {
1756  PRECONDITION(op.size() == 1 && results.empty());
1757  c = convert_switch(op, i_it->args, i_it->source_location);
1758  }
1759  else if(bytecode == BC_pop || bytecode == BC_pop2)
1760  {
1761  c = convert_pop(statement, op);
1762  }
1763  else if(bytecode == BC_instanceof)
1764  {
1765  PRECONDITION(op.size() == 1 && results.size() == 1);
1766 
1767  results[0] =
1769  }
1770  else if(bytecode == BC_monitorenter || bytecode == BC_monitorexit)
1771  {
1772  c = convert_monitorenterexit(statement, op, i_it->source_location);
1773  }
1774  else if(bytecode == BC_swap)
1775  {
1776  PRECONDITION(op.size() == 2 && results.size() == 2);
1777  results[1]=op[0];
1778  results[0]=op[1];
1779  }
1780  else if(bytecode == BC_nop)
1781  {
1782  c=code_skipt();
1783  }
1784  else
1785  {
1786  c=codet(statement);
1787  c.operands()=op;
1788  }
1789 
1790  c = do_exception_handling(method, working_set, cur_pc, c);
1791 
1792  // Finally if this is the beginning of a catch block (already determined
1793  // before the big bytecode switch), insert the exception 'landing pad'
1794  // instruction before the actual instruction:
1795  if(catch_instruction.has_value())
1796  {
1797  if(c.get_statement() != ID_block)
1798  c = code_blockt{{c}};
1799  c.operands().insert(c.operands().begin(), *catch_instruction);
1800  }
1801 
1802  if(!i_it->source_location.get_line().empty())
1804 
1805  push(results);
1806 
1807  instruction.done = true;
1808  for(const auto address : instruction.successors)
1809  {
1810  address_mapt::iterator a_it2=address_map.find(address);
1811  CHECK_RETURN(a_it2 != address_map.end());
1812 
1813  // clear the stack if this is an exception handler
1814  for(const auto &exception_row : method.exception_table)
1815  {
1816  if(address==exception_row.handler_pc)
1817  {
1818  stack.clear();
1819  break;
1820  }
1821  }
1822 
1823  if(!stack.empty() && a_it2->second.predecessors.size()>1)
1824  {
1825  // copy into temporaries
1826  code_blockt more_code;
1827 
1828  // introduce temporaries when successor is seen for the first
1829  // time
1830  if(a_it2->second.stack.empty())
1831  {
1832  for(stackt::iterator s_it=stack.begin();
1833  s_it!=stack.end();
1834  ++s_it)
1835  {
1836  symbol_exprt lhs=tmp_variable("$stack", s_it->type());
1837  code_assignt a(lhs, *s_it);
1838  more_code.add(a);
1839 
1840  s_it->swap(lhs);
1841  }
1842  }
1843  else
1844  {
1845  INVARIANT(
1846  a_it2->second.stack.size() == stack.size(),
1847  "Stack sizes should be the same.");
1848  stackt::const_iterator os_it=a_it2->second.stack.begin();
1849  for(auto &expr : stack)
1850  {
1851  INVARIANT(
1852  has_prefix(os_it->get_string(ID_C_base_name), "$stack"),
1853  "invalid base name");
1854  symbol_exprt lhs=to_symbol_expr(*os_it);
1855  code_assignt a(lhs, expr);
1856  more_code.add(a);
1857 
1858  expr.swap(lhs);
1859  ++os_it;
1860  }
1861  }
1862 
1863  if(results.empty())
1864  {
1865  more_code.add(c);
1866  c.swap(more_code);
1867  }
1868  else
1869  {
1870  if(c.get_statement() != ID_block)
1871  c = code_blockt{{c}};
1872 
1873  auto &last_statement=to_code_block(c).find_last_statement();
1874  if(last_statement.get_statement()==ID_goto)
1875  {
1876  // Insert stack twiddling before branch:
1877  if(last_statement.get_statement() != ID_block)
1878  last_statement = code_blockt{{last_statement}};
1879 
1880  last_statement.operands().insert(
1881  last_statement.operands().begin(),
1882  more_code.statements().begin(),
1883  more_code.statements().end());
1884  }
1885  else
1886  to_code_block(c).append(more_code);
1887  }
1888  }
1889  a_it2->second.stack=stack;
1890  }
1891  }
1892 
1893  code_blockt code;
1894 
1895  // Add anonymous locals to the symtab:
1896  for(const auto &var : used_local_names)
1897  {
1898  symbolt new_symbol{var.get_identifier(), var.type(), ID_java};
1899  new_symbol.base_name=var.get(ID_C_base_name);
1900  new_symbol.pretty_name=strip_java_namespace_prefix(var.get_identifier());
1901  new_symbol.is_file_local=true;
1902  new_symbol.is_thread_local=true;
1903  new_symbol.is_lvalue=true;
1904  symbol_table.add(new_symbol);
1905  }
1906 
1907  // Try to recover block structure as indicated in the local variable table:
1908 
1909  // The block tree node mirrors the block structure of root_block,
1910  // indexing the Java PCs where each subblock starts and ends.
1911  block_tree_nodet root;
1912  code_blockt root_block;
1913 
1914  // First create a simple flat list of basic blocks. We'll add lexical nesting
1915  // constructs as variable live-ranges require next.
1916  bool start_new_block=true;
1917  bool has_seen_previous_address=false;
1918  method_offsett previous_address = 0;
1919  for(const auto &address_pair : address_map)
1920  {
1921  const method_offsett address = address_pair.first;
1922  CHECK_RETURN(address_pair.first == address_pair.second.source->address);
1923  const codet &c=address_pair.second.code;
1924 
1925  // Start a new lexical block if this is a branch target:
1926  if(!start_new_block)
1927  start_new_block=targets.find(address)!=targets.end();
1928  // Start a new lexical block if this is a control flow join
1929  // (e.g. due to exceptional control flow)
1930  if(!start_new_block)
1931  start_new_block=address_pair.second.predecessors.size()>1;
1932  // Start a new lexical block if we've just entered a block in which
1933  // exceptions are handled. This is usually the start of a try block, but a
1934  // single try can be represented as multiple non-contiguous blocks in the
1935  // exception table.
1936  if(!start_new_block && has_seen_previous_address)
1937  {
1938  for(const auto &exception_row : method.exception_table)
1939  if(exception_row.start_pc==previous_address)
1940  {
1941  start_new_block=true;
1942  break;
1943  }
1944  }
1945 
1946  if(start_new_block)
1947  {
1948  root_block.add(
1950  root.branch.push_back(block_tree_nodet::get_leaf());
1951  INVARIANT(
1952  (root.branch_addresses.empty() ||
1953  root.branch_addresses.back() < address),
1954  "Block addresses should be unique and increasing");
1955  root.branch_addresses.push_back(address);
1956  }
1957 
1958  if(c.get_statement()!=ID_skip)
1959  {
1960  auto &lastlabel = to_code_label(root_block.statements().back());
1961  auto &add_to_block=to_code_block(lastlabel.code());
1962  add_to_block.add(c);
1963  }
1964  start_new_block=address_pair.second.successors.size()>1;
1965 
1966  previous_address=address;
1967  has_seen_previous_address=true;
1968  }
1969 
1970  // Find out where temporaries are used:
1971  std::map<irep_idt, variablet> temporary_variable_live_ranges;
1972  for(const auto &aentry : address_map)
1974  aentry.first,
1975  aentry.second.code,
1976  temporary_variable_live_ranges);
1977 
1978  std::vector<const variablet*> vars_to_process;
1979  for(const auto &vlist : variables)
1980  for(const auto &v : vlist)
1981  vars_to_process.push_back(&v);
1982 
1983  for(const auto &v : tmp_vars)
1984  vars_to_process.push_back(
1985  &temporary_variable_live_ranges.at(v.get_identifier()));
1986 
1987  for(const auto &v : used_local_names)
1988  vars_to_process.push_back(
1989  &temporary_variable_live_ranges.at(v.get_identifier()));
1990 
1991  for(const auto vp : vars_to_process)
1992  {
1993  const auto &v=*vp;
1994  if(v.is_parameter)
1995  continue;
1996  // Merge lexical scopes as far as possible to allow us to
1997  // declare these variable scopes faithfully.
1998  // Don't insert yet, as for the time being the blocks' only
1999  // operands must be other blocks.
2000  // The declarations will be inserted in the next pass instead.
2002  root,
2003  root_block,
2004  v.start_pc,
2005  v.start_pc + v.length,
2006  std::numeric_limits<method_offsett>::max(),
2007  address_map);
2008  }
2009  for(const auto vp : vars_to_process)
2010  {
2011  const auto &v=*vp;
2012  if(v.is_parameter)
2013  continue;
2014  // Skip anonymous variables:
2015  if(v.symbol_expr.get_identifier().empty())
2016  continue;
2017  auto &block = get_block_for_pcrange(
2018  root,
2019  root_block,
2020  v.start_pc,
2021  v.start_pc + v.length,
2022  std::numeric_limits<method_offsett>::max());
2023  code_declt d(v.symbol_expr);
2024  block.statements().insert(block.statements().begin(), d);
2025  }
2026 
2027  for(auto &block : root_block.statements())
2028  code.add(block);
2029 
2030  return code;
2031 }
2032 
2034  const irep_idt &statement,
2035  const exprt::operandst &op)
2036 {
2037  // these are skips
2038  codet c = code_skipt();
2039 
2040  // pop2 removes two single-word items from the stack (e.g. two
2041  // integers, or an integer and an object reference) or one
2042  // two-word item (i.e. a double or a long).
2043  // http://cs.au.dk/~mis/dOvs/jvmspec/ref-pop2.html
2044  if(statement == "pop2" && get_bytecode_type_width(op[0].type()) == 32)
2045  pop(1);
2046  return c;
2047 }
2048 
2050  const exprt::operandst &op,
2052  const source_locationt &location)
2053 {
2054  // we turn into switch-case
2055  code_blockt code_block;
2056  code_block.add_source_location() = location;
2057 
2058  bool is_label = true;
2059  for(auto a_it = args.begin(); a_it != args.end();
2060  a_it++, is_label = !is_label)
2061  {
2062  if(is_label)
2063  {
2064  const mp_integer number =
2065  numeric_cast_v<mp_integer>(to_constant_expr(*a_it));
2066  // The switch case does not contain any code, it just branches via a GOTO
2067  // to the jump target of the tableswitch/lookupswitch case at
2068  // hand. Therefore we consider this code to belong to the source bytecode
2069  // instruction and not the target instruction.
2070  const method_offsett label_number =
2071  numeric_cast_v<method_offsett>(number);
2072  code_gotot code(label(std::to_string(label_number)));
2073  code.add_source_location() = location;
2074 
2075  if(a_it == args.begin())
2076  {
2077  code_switch_caset code_case(nil_exprt(), std::move(code));
2078  code_case.set_default();
2079 
2080  code_block.add(std::move(code_case), location);
2081  }
2082  else
2083  {
2084  exprt case_op =
2085  typecast_exprt::conditional_cast(*std::prev(a_it), op[0].type());
2086  case_op.add_source_location() = location;
2087 
2088  code_switch_caset code_case(std::move(case_op), std::move(code));
2089  code_block.add(std::move(code_case), location);
2090  }
2091  }
2092  }
2093 
2094  code_switcht code_switch(op[0], std::move(code_block));
2095  code_switch.add_source_location() = location;
2096  return code_switch;
2097 }
2098 
2100  const irep_idt &statement,
2101  const exprt::operandst &op,
2102  const source_locationt &source_location)
2103 {
2104  const irep_idt descriptor = (statement == "monitorenter") ?
2105  "java::java.lang.Object.monitorenter:(Ljava/lang/Object;)V" :
2106  "java::java.lang.Object.monitorexit:(Ljava/lang/Object;)V";
2107 
2108  if(!threading_support || !symbol_table.has_symbol(descriptor))
2109  return code_skipt();
2110 
2111  // becomes a function call
2112  java_method_typet type(
2114  java_void_type());
2115  code_function_callt call(symbol_exprt(descriptor, type), {op[0]});
2116  call.add_source_location() = source_location;
2117  if(needed_lazy_methods && symbol_table.has_symbol(descriptor))
2118  needed_lazy_methods->add_needed_method(descriptor);
2119  return std::move(call);
2120 }
2121 
2123  exprt::operandst &op,
2124  exprt::operandst &results)
2125 {
2126  if(get_bytecode_type_width(stack.back().type()) == 32)
2127  op = pop(2);
2128  else
2129  op = pop(1);
2130 
2131  results.insert(results.end(), op.begin(), op.end());
2132  results.insert(results.end(), op.begin(), op.end());
2133 }
2134 
2136  exprt::operandst &op,
2137  exprt::operandst &results)
2138 {
2139  if(get_bytecode_type_width(stack.back().type()) == 32)
2140  op = pop(3);
2141  else
2142  op = pop(2);
2143 
2144  results.insert(results.end(), op.begin() + 1, op.end());
2145  results.insert(results.end(), op.begin(), op.end());
2146 }
2147 
2149  exprt::operandst &op,
2150  exprt::operandst &results)
2151 {
2152  if(get_bytecode_type_width(stack.back().type()) == 32)
2153  op = pop(2);
2154  else
2155  op = pop(1);
2156 
2157  exprt::operandst op2;
2158 
2159  if(get_bytecode_type_width(stack.back().type()) == 32)
2160  op2 = pop(2);
2161  else
2162  op2 = pop(1);
2163 
2164  results.insert(results.end(), op.begin(), op.end());
2165  results.insert(results.end(), op2.begin(), op2.end());
2166  results.insert(results.end(), op.begin(), op.end());
2167 }
2168 
2170  const irep_idt &statement,
2171  const constant_exprt &arg0,
2172  exprt::operandst &results) const
2173 {
2174  const char type_char = statement[0];
2175  const bool is_double('d' == type_char);
2176  const bool is_float('f' == type_char);
2177 
2178  if(is_double || is_float)
2179  {
2180  const ieee_float_spect spec(
2183 
2184  ieee_floatt value(spec);
2185  if(arg0.type().id() != ID_floatbv)
2186  {
2187  const mp_integer number = numeric_cast_v<mp_integer>(arg0);
2188  value.from_integer(number);
2189  }
2190  else
2191  value.from_expr(arg0);
2192 
2193  results[0] = value.to_expr();
2194  }
2195  else
2196  {
2197  const mp_integer value = numeric_cast_v<mp_integer>(arg0);
2198  const typet type = java_type_from_char(statement[0]);
2199  results[0] = from_integer(value, type);
2200  }
2201  return results;
2202 }
2203 
2205  const java_method_typet::parameterst &parameters,
2207 {
2208  // do some type adjustment for the arguments,
2209  // as Java promotes arguments
2210  // Also cast pointers since intermediate locals
2211  // can be void*.
2212  INVARIANT(
2213  parameters.size() == arguments.size(),
2214  "for each parameter there must be exactly one argument");
2215  for(std::size_t i = 0; i < parameters.size(); i++)
2216  {
2217  const typet &type = parameters[i].type();
2218  if(
2219  type == java_boolean_type() || type == java_char_type() ||
2220  type == java_byte_type() || type == java_short_type() ||
2221  type.id() == ID_pointer)
2222  {
2223  arguments[i] = typecast_exprt::conditional_cast(arguments[i], type);
2224  }
2225  }
2226 }
2227 
2229  source_locationt location,
2230  const irep_idt &statement,
2231  class_method_descriptor_exprt &class_method_descriptor,
2232  codet &c,
2233  exprt::operandst &results)
2234 {
2235  const bool use_this(statement != "invokestatic");
2236  const bool is_virtual(
2237  statement == "invokevirtual" || statement == "invokeinterface");
2238 
2239  const irep_idt &invoked_method_id = class_method_descriptor.get_identifier();
2240  INVARIANT(
2241  !invoked_method_id.empty(),
2242  "invoke statement arg0 must have an identifier");
2243 
2244  auto method_symbol = symbol_table.symbols.find(invoked_method_id);
2245 
2246  // Use the most precise type available: the actual symbol has generic info,
2247  // whereas the type given by the invoke instruction doesn't and is therefore
2248  // less accurate.
2249  if(method_symbol != symbol_table.symbols.end())
2250  {
2251  // Note the number of parameters might change here due to constructors using
2252  // invokespecial will have zero arguments (the `this` is added below)
2253  // but the symbol for the <init> will have the this parameter.
2254  INVARIANT(
2255  to_java_method_type(class_method_descriptor.type()).return_type().id() ==
2256  to_code_type(method_symbol->second.type).return_type().id(),
2257  "Function return type must not change in kind");
2258  class_method_descriptor.type() = method_symbol->second.type;
2259  }
2260 
2261  // Note arg0 and arg0.type() are subject to many side-effects in this method,
2262  // then finally used to populate the call instruction.
2263  java_method_typet &method_type =
2264  to_java_method_type(class_method_descriptor.type());
2265 
2266  java_method_typet::parameterst &parameters(method_type.parameters());
2267 
2268  if(use_this)
2269  {
2270  const irep_idt class_id = class_method_descriptor.class_id();
2271 
2272  if(parameters.empty() || !parameters[0].get_this())
2273  {
2274  typet thistype = struct_tag_typet(class_id);
2275  reference_typet object_ref_type = java_reference_type(thistype);
2276  java_method_typet::parametert this_p(object_ref_type);
2277  this_p.set_this();
2278  this_p.set_base_name(ID_this);
2279  parameters.insert(parameters.begin(), this_p);
2280  }
2281 
2282  // Note invokespecial is used for super-method calls as well as
2283  // constructors.
2284  if(statement == "invokespecial")
2285  {
2286  if(is_constructor(invoked_method_id))
2287  {
2289  needed_lazy_methods->add_needed_class(class_id);
2290  method_type.set_is_constructor();
2291  }
2292  else
2293  method_type.set(ID_java_super_method_call, true);
2294  }
2295  }
2296 
2297  location.set_function(method_id);
2298 
2299  code_function_callt::argumentst arguments = pop(parameters.size());
2300 
2301  // double-check a bit
2302  INVARIANT(
2303  !use_this || arguments.front().type().id() == ID_pointer,
2304  "first argument must be a pointer");
2305 
2306  adjust_invoke_argument_types(parameters, arguments);
2307 
2308  // do some type adjustment for return values
2309  exprt lhs = nil_exprt();
2310  const typet &return_type = method_type.return_type();
2311 
2312  if(return_type.id() != ID_empty)
2313  {
2314  // return types are promoted in Java
2315  lhs = tmp_variable("return", return_type);
2316  exprt promoted = java_bytecode_promotion(lhs);
2317  results.resize(1);
2318  results[0] = promoted;
2319  }
2320 
2321  // If we don't have a definition for the called symbol, and we won't
2322  // inherit a definition from a super-class, we create a new symbol and
2323  // insert it in the symbol table. The name and type of the method are
2324  // derived from the information we have in the call.
2325  // We fix the access attribute to ID_private, because of the following
2326  // reasons:
2327  // - We don't know the original access attribute and since the .class file is
2328  // unavailable, we have no way to know.
2329  // - The translated method could be an inherited protected method, hence
2330  // accessible from the original caller, but not from the generated test.
2331  // Therefore we must assume that the method is not accessible.
2332  // We set opaque methods as final to avoid assuming they can be overridden.
2333  if(
2334  method_symbol == symbol_table.symbols.end() &&
2335  !(is_virtual && is_method_inherited(
2336  class_method_descriptor.class_id(),
2337  class_method_descriptor.mangled_method_name())))
2338  {
2340  invoked_method_id,
2341  class_method_descriptor.base_method_name(),
2342  id2string(class_method_descriptor.class_id()).substr(6) + "." +
2343  id2string(class_method_descriptor.base_method_name()) + "()",
2344  method_type,
2345  class_method_descriptor.class_id(),
2346  symbol_table,
2348  }
2349 
2350  exprt function;
2351  if(is_virtual)
2352  {
2353  // dynamic binding
2354  PRECONDITION(use_this);
2355  PRECONDITION(!arguments.empty());
2356  function = class_method_descriptor;
2357  // Populate needed methods later,
2358  // once we know what object types can exist.
2359  }
2360  else
2361  {
2362  // static binding
2363  function = symbol_exprt(invoked_method_id, method_type);
2365  {
2366  needed_lazy_methods->add_needed_method(invoked_method_id);
2367  // Calling a static method causes static initialization:
2368  needed_lazy_methods->add_needed_class(class_method_descriptor.class_id());
2369  }
2370  }
2371 
2372  code_function_callt call(
2373  std::move(lhs), std::move(function), std::move(arguments));
2374  call.add_source_location() = location;
2375  call.function().add_source_location() = location;
2376 
2377  // Replacing call if it is a function of the Character library,
2378  // returning the same call otherwise
2380 
2381  if(!use_this)
2382  {
2383  codet clinit_call = get_clinit_call(class_method_descriptor.class_id());
2384  if(clinit_call.get_statement() != ID_skip)
2385  c = code_blockt({clinit_call, c});
2386  }
2387 }
2388 
2390  source_locationt location,
2391  codet &c)
2392 {
2393  exprt operand = pop(1)[0];
2394 
2395  // we may need to adjust the type of the argument
2396  operand = typecast_exprt::conditional_cast(operand, bool_typet());
2397 
2398  c = code_assumet(operand);
2399  location.set_function(method_id);
2400  c.add_source_location() = location;
2401  return c;
2402 }
2403 
2405  const exprt &arg0,
2406  const exprt::operandst &op,
2407  codet &c,
2408  exprt::operandst &results) const
2409 {
2410  java_instanceof_exprt check(op[0], to_struct_tag_type(arg0.type()));
2411  code_assertt assert_class(check);
2412  assert_class.add_source_location().set_comment("Dynamic cast check");
2413  assert_class.add_source_location().set_property_class("bad-dynamic-cast");
2414  // we add this assert such that we can recognise it
2415  // during the instrumentation phase
2416  c = std::move(assert_class);
2417  results[0] = op[0];
2418 }
2419 
2421  const source_locationt &location,
2422  const exprt::operandst &op,
2423  codet &c,
2424  exprt::operandst &results) const
2425 {
2426  if(
2429  to_reference_type(op[0].type())) == "java::java.lang.AssertionError") &&
2431  {
2432  // we translate athrow into
2433  // ASSERT false;
2434  // ASSUME false:
2435  code_assertt assert_code(false_exprt{});
2436  source_locationt assert_location = location; // copy
2437  assert_location.set_comment("assertion at " + location.as_string());
2438  assert_location.set("user-provided", true);
2439  assert_location.set_property_class(ID_assertion);
2440  assert_code.add_source_location() = assert_location;
2441 
2442  code_assumet assume_code(false_exprt{});
2443  source_locationt assume_location = location; // copy
2444  assume_location.set("user-provided", true);
2445  assume_code.add_source_location() = assume_location;
2446 
2447  c = code_blockt({assert_code, assume_code});
2448  }
2449  else
2450  {
2451  side_effect_expr_throwt throw_expr(irept(), typet(), location);
2452  throw_expr.copy_to_operands(op[0]);
2453  c = code_expressiont(throw_expr);
2454  }
2455  results[0] = op[0];
2456 }
2457 
2460  const std::set<method_offsett> &working_set,
2461  method_offsett cur_pc,
2462  codet &code)
2463 {
2464  // For each exception handler range that starts here add a CATCH-PUSH marker
2465  // Each CATCH-PUSH records a list of all the exception id and handler label
2466  // pairs handled for its exact block
2467 
2468  // Gather all try-catch blocks that have cur_pc as the starting pc
2469  typedef std::vector<std::reference_wrapper<
2471  std::map<std::size_t, exceptionst> exceptions_by_end;
2473  : method.exception_table)
2474  {
2475  if(exception.start_pc == cur_pc)
2476  exceptions_by_end[exception.end_pc].push_back(exception);
2477  }
2478  for(const auto &exceptions : exceptions_by_end)
2479  {
2480  // For each block with a distinct end position create one CATCH-PUSH
2481  code_push_catcht catch_push;
2482  // Fill in its exception_list
2483  code_push_catcht::exception_listt &exception_list =
2484  catch_push.exception_list();
2486  : exceptions.second)
2487  {
2488  exception_list.emplace_back(
2489  exception.catch_type.get_identifier(),
2490  // Record the exception handler in the CATCH-PUSH instruction by
2491  // generating a label corresponding to the handler's pc
2492  label(std::to_string(exception.handler_pc)));
2493  }
2494  // Prepend the CATCH-PUSH instruction
2495  code = code_blockt({ std::move(catch_push), code });
2496  }
2497 
2498  // Next add the CATCH-POP instructions
2499  // exception_row.end_pc is exclusive so append a CATCH-POP instruction if
2500  // this is the instruction before it.
2501  // To do this, attempt to find all exception handlers that end at the
2502  // earliest known instruction after this one.
2503 
2504  // Dangerously, we assume that the next opcode in the bytecode after the end
2505  // of the exception handler block (whose address matches the exclusive end
2506  // position of the block) will be a successor of some code investigated
2507  // before the instruction at the end of that handler and therefore in the
2508  // working set.
2509  // As an example of where this may fail, for non-obfuscated bytecode
2510  // generated by most compilers the next opcode after the block ending at the
2511  // end of the try block is the lexically next bit of code after the try
2512  // block, i.e. the catch block. When there aren't any throwing statements in
2513  // the try block this block will not be the successor of any instruction.
2514 
2515  auto next_opcode_it = std::find_if(
2516  working_set.begin(),
2517  working_set.end(),
2518  [cur_pc](method_offsett offset) { return offset > cur_pc; });
2519  if(next_opcode_it != working_set.end())
2520  {
2521  // Count the distinct start positions of handlers that end at this location
2522  std::set<std::size_t> start_positions; // Use a set to deduplicate
2523  for(const auto &exception_row : method.exception_table)
2524  {
2525  // Check if the next instruction found is the (exclusive) end of a block
2526  if(*next_opcode_it == exception_row.end_pc)
2527  start_positions.insert(exception_row.start_pc);
2528  }
2529  for(std::size_t handler = 0; handler < start_positions.size(); ++handler)
2530  {
2531  // Append a CATCH-POP instruction before the end of the block
2532  code = code_blockt({ code, code_pop_catcht() });
2533  }
2534  }
2535 
2536  return code;
2537 }
2538 
2540  const source_locationt &location,
2541  const exprt &arg0,
2542  const exprt::operandst &op,
2543  exprt::operandst &results)
2544 {
2545  const reference_typet ref_type = java_reference_type(arg0.type());
2546  side_effect_exprt java_new_array(ID_java_new_array, ref_type, location);
2547  java_new_array.operands() = op;
2548 
2549  code_blockt create;
2550 
2551  if(max_array_length != 0)
2552  {
2554  binary_relation_exprt le_max_size(op[0], ID_le, size_limit);
2555  code_assumet assume_le_max_size(le_max_size);
2556  create.add(assume_le_max_size);
2557  }
2558 
2559  const exprt tmp = tmp_variable("newarray", ref_type);
2560  create.add(code_assignt(tmp, java_new_array));
2561  results[0] = tmp;
2562 
2563  return create;
2564 }
2565 
2567  const source_locationt &location,
2568  const irep_idt &statement,
2569  const exprt &arg0,
2570  const exprt::operandst &op,
2571  exprt::operandst &results)
2572 {
2573  java_reference_typet ref_type = [&]() {
2574  if(statement == "newarray")
2575  {
2576  irep_idt id = arg0.type().id();
2577 
2578  char element_type;
2579  if(id == ID_bool)
2580  element_type = 'z';
2581  else if(id == ID_char)
2582  element_type = 'c';
2583  else if(id == ID_float)
2584  element_type = 'f';
2585  else if(id == ID_double)
2586  element_type = 'd';
2587  else if(id == ID_byte)
2588  element_type = 'b';
2589  else if(id == ID_short)
2590  element_type = 's';
2591  else if(id == ID_int)
2592  element_type = 'i';
2593  else if(id == ID_long)
2594  element_type = 'j';
2595  else
2596  element_type = '?';
2597  return java_array_type(element_type);
2598  }
2599  else
2600  {
2602  }
2603  }();
2604 
2605  side_effect_exprt java_new_array(ID_java_new_array, ref_type, location);
2606  java_new_array.copy_to_operands(op[0]);
2607 
2608  code_blockt block;
2609 
2610  if(max_array_length != 0)
2611  {
2613  binary_relation_exprt le_max_size(op[0], ID_le, size_limit);
2614  code_assumet assume_le_max_size(le_max_size);
2615  block.add(std::move(assume_le_max_size));
2616  }
2617  const exprt tmp = tmp_variable("newarray", ref_type);
2618  block.add(code_assignt(tmp, java_new_array));
2619  results[0] = tmp;
2620 
2621  return block;
2622 }
2623 
2625  const source_locationt &location,
2626  const exprt &arg0,
2627  codet &c,
2628  exprt::operandst &results)
2629 {
2630  const reference_typet ref_type = java_reference_type(arg0.type());
2631  side_effect_exprt java_new_expr(ID_java_new, ref_type, location);
2632 
2633  if(!location.get_line().empty())
2634  java_new_expr.add_source_location() = location;
2635 
2636  const exprt tmp = tmp_variable("new", ref_type);
2637  c = code_assignt(tmp, java_new_expr);
2638  c.add_source_location() = location;
2639  codet clinit_call =
2641  if(clinit_call.get_statement() != ID_skip)
2642  {
2643  c = code_blockt({clinit_call, c});
2644  }
2645  results[0] = tmp;
2646 }
2647 
2649  const source_locationt &location,
2650  const exprt &arg0,
2651  const exprt::operandst &op,
2652  const symbol_exprt &symbol_expr)
2653 {
2654  if(needed_lazy_methods && arg0.type().id() == ID_struct_tag)
2655  {
2656  needed_lazy_methods->add_needed_class(
2658  }
2659 
2660  code_blockt block;
2661  block.add_source_location() = location;
2662 
2663  // Note this initializer call deliberately inits the class used to make
2664  // the reference, which may be a child of the class that actually defines
2665  // the field.
2666  codet clinit_call = get_clinit_call(arg0.get_string(ID_class));
2667  if(clinit_call.get_statement() != ID_skip)
2668  block.add(clinit_call);
2669 
2671  "stack_static_field",
2672  block,
2674  symbol_expr.get_identifier());
2675  block.add(code_assignt(symbol_expr, op[0]));
2676  return block;
2677 }
2678 
2680  const fieldref_exprt &arg0,
2681  const exprt::operandst &op)
2682 {
2683  code_blockt block;
2685  "stack_field", block, bytecode_write_typet::FIELD, arg0.component_name());
2686  block.add(code_assignt(to_member(op[0], arg0, ns), op[1]));
2687  return block;
2688 }
2689 
2691  const source_locationt &source_location,
2692  const exprt &arg0,
2693  const symbol_exprt &symbol_expr,
2694  const bool is_assertions_disabled_field,
2695  codet &c,
2696  exprt::operandst &results)
2697 {
2699  {
2700  if(arg0.type().id() == ID_struct_tag)
2701  {
2702  needed_lazy_methods->add_needed_class(
2704  }
2705  else if(arg0.type().id() == ID_pointer)
2706  {
2707  const auto &pointer_type = to_pointer_type(arg0.type());
2708  if(pointer_type.base_type().id() == ID_struct_tag)
2709  {
2710  needed_lazy_methods->add_needed_class(
2712  }
2713  }
2714  else if(is_assertions_disabled_field)
2715  {
2716  needed_lazy_methods->add_needed_class(arg0.get_string(ID_class));
2717  }
2718  }
2719  symbol_exprt symbol_with_location = symbol_expr;
2720  symbol_with_location.add_source_location() = source_location;
2721  results[0] = java_bytecode_promotion(symbol_with_location);
2722 
2723  // Note this initializer call deliberately inits the class used to make
2724  // the reference, which may be a child of the class that actually defines
2725  // the field.
2726  codet clinit_call = get_clinit_call(arg0.get_string(ID_class));
2727  if(clinit_call.get_statement() != ID_skip)
2728  c = clinit_call;
2729  else if(is_assertions_disabled_field)
2730  {
2731  // set $assertionDisabled to false
2732  c = code_assignt(symbol_expr, false_exprt());
2733  }
2734 }
2735 
2737  const irep_idt &statement,
2738  const exprt::operandst &op,
2739  exprt::operandst &results) const
2740 {
2741  const int nan_value(statement[4] == 'l' ? -1 : 1);
2742  const typet result_type(java_int_type());
2743  const exprt nan_result(from_integer(nan_value, result_type));
2744 
2745  // (value1 == NaN || value2 == NaN) ?
2746  // nan_value : value1 < value2 ? -1 : value2 < value1 1 ? 1 : 0;
2747  // (value1 == NaN || value2 == NaN) ?
2748  // nan_value : value1 == value2 ? 0 : value1 < value2 -1 ? 1 : 0;
2749 
2750  isnan_exprt nan_op0(op[0]);
2751  isnan_exprt nan_op1(op[1]);
2752  exprt one = from_integer(1, result_type);
2753  exprt minus_one = from_integer(-1, result_type);
2754  results[0] = if_exprt(
2755  or_exprt(nan_op0, nan_op1),
2756  nan_result,
2757  if_exprt(
2758  ieee_float_equal_exprt(op[0], op[1]),
2759  from_integer(0, result_type),
2760  if_exprt(binary_relation_exprt(op[0], ID_lt, op[1]), minus_one, one)));
2761  return results;
2762 }
2763 
2765  const exprt::operandst &op,
2766  exprt::operandst &results) const
2767 { // The integer result on the stack is:
2768  // 0 if op[0] equals op[1]
2769  // -1 if op[0] is less than op[1]
2770  // 1 if op[0] is greater than op[1]
2771 
2772  const typet t = java_int_type();
2773  exprt one = from_integer(1, t);
2774  exprt minus_one = from_integer(-1, t);
2775 
2776  if_exprt greater =
2777  if_exprt(binary_relation_exprt(op[0], ID_gt, op[1]), one, minus_one);
2778 
2779  results[0] = if_exprt(
2780  binary_relation_exprt(op[0], ID_equal, op[1]), from_integer(0, t), greater);
2781  return results;
2782 }
2783 
2785  const irep_idt &statement,
2786  const exprt::operandst &op,
2787  exprt::operandst &results) const
2788 {
2789  const typet type = java_type_from_char(statement[0]);
2790 
2791  const std::size_t width = get_bytecode_type_width(type);
2792 
2793  // According to JLS 15.19 Shift Operators
2794  // a mask 0b11111 is applied for int and 0b111111 for long.
2795  exprt mask = from_integer(width - 1, op[1].type());
2796 
2797  results[0] = shl_exprt(op[0], bitand_exprt(op[1], mask));
2798  return results;
2799 }
2800 
2802  const irep_idt &statement,
2803  const exprt::operandst &op,
2804  exprt::operandst &results) const
2805 {
2806  const typet type = java_type_from_char(statement[0]);
2807 
2808  const std::size_t width = get_bytecode_type_width(type);
2809  typet target = unsignedbv_typet(width);
2810 
2811  exprt lhs = typecast_exprt::conditional_cast(op[0], target);
2812  exprt rhs = typecast_exprt::conditional_cast(op[1], target);
2813 
2814  results[0] =
2815  typecast_exprt::conditional_cast(lshr_exprt(lhs, rhs), op[0].type());
2816 
2817  return results;
2818 }
2819 
2821  const exprt &arg0,
2822  const exprt &arg1,
2823  const source_locationt &location,
2824  const method_offsett address)
2825 {
2826  code_blockt block;
2827  block.add_source_location() = location;
2828  // search variable on stack
2829  const exprt &locvar = variable(arg0, 'i', address);
2831  "stack_iinc",
2832  block,
2834  to_symbol_expr(locvar).get_identifier());
2835 
2836  const exprt arg1_int_type =
2838  code_assignt code_assign(
2839  variable(arg0, 'i', address),
2840  plus_exprt(
2842  variable(arg0, 'i', address), java_int_type()),
2843  arg1_int_type));
2844  block.add(std::move(code_assign));
2845 
2846  return block;
2847 }
2848 
2851  const exprt::operandst &op,
2852  const mp_integer &number,
2853  const source_locationt &location) const
2854 {
2855  const typecast_exprt lhs(op[0], java_reference_type(java_void_type()));
2856  const exprt rhs(null_pointer_exprt(to_pointer_type(lhs.type())));
2857 
2858  const method_offsett label_number = numeric_cast_v<method_offsett>(number);
2859 
2860  code_ifthenelset code_branch(
2861  binary_relation_exprt(lhs, ID_equal, rhs),
2862  code_gotot(label(std::to_string(label_number))));
2863 
2864  code_branch.then_case().add_source_location() =
2865  address_map.at(label_number).source->source_location;
2866  code_branch.add_source_location() = location;
2867 
2868  return code_branch;
2869 }
2870 
2873  const exprt::operandst &op,
2874  const mp_integer &number,
2875  const source_locationt &location) const
2876 {
2877  const typecast_exprt lhs(op[0], java_reference_type(java_void_type()));
2878  const exprt rhs(null_pointer_exprt(to_pointer_type(lhs.type())));
2879 
2880  const method_offsett label_number = numeric_cast_v<method_offsett>(number);
2881 
2882  code_ifthenelset code_branch(
2883  binary_relation_exprt(lhs, ID_notequal, rhs),
2884  code_gotot(label(std::to_string(label_number))));
2885 
2886  code_branch.then_case().add_source_location() =
2887  address_map.at(label_number).source->source_location;
2888  code_branch.add_source_location() = location;
2889 
2890  return code_branch;
2891 }
2892 
2895  const exprt::operandst &op,
2896  const irep_idt &id,
2897  const mp_integer &number,
2898  const source_locationt &location) const
2899 {
2900  const method_offsett label_number = numeric_cast_v<method_offsett>(number);
2901 
2902  code_ifthenelset code_branch(
2903  binary_relation_exprt(op[0], id, from_integer(0, op[0].type())),
2904  code_gotot(label(std::to_string(label_number))));
2905 
2906  code_branch.cond().add_source_location() = location;
2908  code_branch.then_case().add_source_location() =
2909  address_map.at(label_number).source->source_location;
2911  code_branch.add_source_location() = location;
2913 
2914  return code_branch;
2915 }
2916 
2919  const u1 bytecode,
2920  const exprt::operandst &op,
2921  const mp_integer &number,
2922  const source_locationt &location) const
2923 {
2924  const irep_idt cmp_op = get_if_cmp_operator(bytecode);
2925  binary_relation_exprt condition(
2926  op[0], cmp_op, typecast_exprt::conditional_cast(op[1], op[0].type()));
2927  condition.add_source_location() = location;
2928 
2929  const method_offsett label_number = numeric_cast_v<method_offsett>(number);
2930 
2931  code_ifthenelset code_branch(
2932  std::move(condition), code_gotot(label(std::to_string(label_number))));
2933 
2934  code_branch.then_case().add_source_location() =
2935  address_map.at(label_number).source->source_location;
2936  code_branch.add_source_location() = location;
2937 
2938  return code_branch;
2939 }
2940 
2942  const std::vector<method_offsett> &jsr_ret_targets,
2943  const exprt &arg0,
2944  const source_locationt &location,
2945  const method_offsett address)
2946 {
2947  code_blockt c;
2948  auto retvar = variable(arg0, 'a', address);
2949  for(size_t idx = 0, idxlim = jsr_ret_targets.size(); idx != idxlim; ++idx)
2950  {
2951  irep_idt number = std::to_string(jsr_ret_targets[idx]);
2952  code_gotot g(label(number));
2953  g.add_source_location() = location;
2954  if(idx == idxlim - 1)
2955  c.add(g);
2956  else
2957  {
2958  auto address_ptr =
2959  from_integer(jsr_ret_targets[idx], unsignedbv_typet(64));
2960  address_ptr.type() = pointer_type(java_void_type());
2961 
2962  code_ifthenelset branch(equal_exprt(retvar, address_ptr), std::move(g));
2963 
2964  branch.cond().add_source_location() = location;
2965  branch.add_source_location() = location;
2966 
2967  c.add(std::move(branch));
2968  }
2969  }
2970  return c;
2971 }
2972 
2978 static exprt conditional_array_cast(const exprt &expr, char type_char)
2979 {
2980  const auto ref_type =
2981  type_try_dynamic_cast<java_reference_typet>(expr.type());
2982  const bool typecast_not_needed =
2983  ref_type && ((type_char == 'b' && ref_type->subtype().get_identifier() ==
2984  "java::array[boolean]") ||
2985  *ref_type == java_array_type(type_char));
2986  return typecast_not_needed ? expr
2987  : typecast_exprt(expr, java_array_type(type_char));
2988 }
2989 
2991  const irep_idt &statement,
2992  const exprt::operandst &op)
2993 {
2994  PRECONDITION(op.size() == 2);
2995  const char type_char = statement[0];
2996  const exprt op_with_right_type = conditional_array_cast(op[0], type_char);
2997  dereference_exprt deref{op_with_right_type};
2998  deref.set(ID_java_member_access, true);
2999 
3000  auto java_array_type = type_try_dynamic_cast<struct_tag_typet>(deref.type());
3001  INVARIANT(java_array_type, "Java array type should be a struct_tag_typet");
3002  member_exprt data_ptr{
3004  plus_exprt data_plus_offset{std::move(data_ptr), op[1]};
3005  // tag it so it's easy to identify during instrumentation
3006  data_plus_offset.set(ID_java_array_access, true);
3007  return java_bytecode_promotion(dereference_exprt{data_plus_offset});
3008 }
3009 
3011  const exprt &index,
3012  char type_char,
3013  size_t address)
3014 {
3015  const exprt var = variable(index, type_char, address);
3016  if(type_char == 'i')
3017  {
3018  INVARIANT(
3020  type_try_dynamic_cast<bitvector_typet>(var.type())->get_width() <= 32,
3021  "iload can be used for boolean, byte, short, int and char");
3023  }
3024  INVARIANT(
3025  (type_char == 'a' && can_cast_type<reference_typet>(var.type())) ||
3026  var.type() == java_type_from_char(type_char),
3027  "Variable type must match [adflv]load return type");
3028  return var;
3029 }
3030 
3032  const irep_idt &statement,
3033  const exprt &arg0,
3034  const exprt::operandst &op,
3035  const method_offsett address,
3036  const source_locationt &location)
3037 {
3038  const exprt var = variable(arg0, statement[0], address);
3039  const irep_idt &var_name = to_symbol_expr(var).get_identifier();
3040 
3041  code_blockt block;
3042  block.add_source_location() = location;
3043 
3045  "stack_store",
3046  block,
3048  var_name);
3049 
3050  block.add(
3052  location);
3053  return block;
3054 }
3055 
3057  const irep_idt &statement,
3058  const exprt::operandst &op,
3059  const source_locationt &location)
3060 {
3061  PRECONDITION(op.size() == 3);
3062  const char type_char = statement[0];
3063  const exprt op_with_right_type = conditional_array_cast(op[0], type_char);
3064  dereference_exprt deref{op_with_right_type};
3065  deref.set(ID_java_member_access, true);
3066 
3067  auto java_array_type = type_try_dynamic_cast<struct_tag_typet>(deref.type());
3068  INVARIANT(java_array_type, "Java array type should be a struct_tag_typet");
3069  member_exprt data_ptr{
3071  plus_exprt data_plus_offset{std::move(data_ptr), op[1]};
3072  // tag it so it's easy to identify during instrumentation
3073  data_plus_offset.set(ID_java_array_access, true);
3074 
3075  code_blockt block;
3076  block.add_source_location() = location;
3077 
3079  "stack_astore", block, bytecode_write_typet::ARRAY_REF, "");
3080 
3081  code_assignt array_put{dereference_exprt{data_plus_offset}, op[2]};
3082  block.add(std::move(array_put), location);
3083  return block;
3084 }
3085 
3087  const source_locationt &location,
3088  std::size_t instruction_address,
3089  const exprt &arg0,
3090  codet &result_code)
3091 {
3092  const java_method_typet &method_type = to_java_method_type(arg0.type());
3093  const java_method_typet::parameterst &parameters(method_type.parameters());
3094  const typet &return_type = method_type.return_type();
3095 
3096  // Note these must be popped regardless of whether we understand the lambda
3097  // method or not
3098  code_function_callt::argumentst arguments = pop(parameters.size());
3099 
3100  irep_idt synthetic_class_name =
3101  lambda_synthetic_class_name(method_id, instruction_address);
3102 
3103  if(!symbol_table.has_symbol(synthetic_class_name))
3104  {
3105  // We failed to parse the invokedynamic handle as a Java 8+ lambda;
3106  // give up and return null.
3107  const auto value = zero_initializer(return_type, location, ns);
3108  if(!value.has_value())
3109  {
3110  log.error().source_location = location;
3111  log.error() << "failed to zero-initialize return type" << messaget::eom;
3112  throw 0;
3113  }
3114  return value;
3115  }
3116 
3117  // Construct an instance of the synthetic class created for this invokedynamic
3118  // site:
3119 
3120  irep_idt constructor_name = id2string(synthetic_class_name) + ".<init>";
3121 
3122  const symbolt &constructor_symbol = ns.lookup(constructor_name);
3123 
3124  code_blockt result;
3125 
3126  // SyntheticType lambda_new = new SyntheticType;
3127  const reference_typet ref_type =
3128  java_reference_type(struct_tag_typet(synthetic_class_name));
3129  side_effect_exprt java_new_expr(ID_java_new, ref_type, location);
3130  const exprt new_instance = tmp_variable("lambda_new", ref_type);
3131  result.add(code_assignt(new_instance, java_new_expr, location));
3132 
3133  // lambda_new.<init>(capture_1, capture_2, ...);
3134  // Add the implicit 'this' parameter:
3135  arguments.insert(arguments.begin(), new_instance);
3138 
3139  code_function_callt constructor_call(
3140  constructor_symbol.symbol_expr(), arguments);
3141  constructor_call.add_source_location() = location;
3142  result.add(constructor_call);
3144  {
3145  needed_lazy_methods->add_needed_method(constructor_symbol.name);
3146  needed_lazy_methods->add_needed_class(synthetic_class_name);
3147  }
3148 
3149  result_code = std::move(result);
3150 
3151  if(return_type.id() == ID_empty)
3152  return {};
3153  else
3154  return new_instance;
3155 }
3156 
3159  const std::vector<method_offsett> &jsr_ret_targets,
3160  const std::vector<
3161  std::vector<java_bytecode_parse_treet::instructiont>::const_iterator>
3162  &ret_instructions) const
3163 { // Draw edges from every `ret` to every `jsr` successor. Could do better with
3164  // flow analysis to distinguish multiple subroutines within the same function.
3165  for(const auto &retinst : ret_instructions)
3166  {
3167  auto &a_entry = address_map.at(retinst->address);
3168  a_entry.successors.insert(
3169  a_entry.successors.end(), jsr_ret_targets.begin(), jsr_ret_targets.end());
3170  }
3171 }
3172 
3173 std::vector<java_bytecode_convert_methodt::method_offsett>
3175  const method_offsett address,
3177  const
3178 {
3179  std::vector<method_offsett> result;
3180  for(const auto &exception_row : exception_table)
3181  {
3182  if(address >= exception_row.start_pc && address < exception_row.end_pc)
3183  result.push_back(exception_row.handler_pc);
3184  }
3185  return result;
3186 }
3187 
3201  symbolt &method_symbol,
3203  &local_variable_table,
3204  symbol_table_baset &symbol_table)
3205 {
3206  // Obtain a std::vector of java_method_typet::parametert objects from the
3207  // (function) type of the symbol
3208  java_method_typet &method_type = to_java_method_type(method_symbol.type);
3209  java_method_typet::parameterst &parameters = method_type.parameters();
3210 
3211  // Find number of parameters
3212  unsigned slots_for_parameters = java_method_parameter_slots(method_type);
3213 
3214  // Find parameter names in the local variable table:
3215  typedef std::pair<irep_idt, irep_idt> base_name_and_identifiert;
3216  std::map<std::size_t, base_name_and_identifiert> param_names;
3217  for(const auto &v : local_variable_table)
3218  {
3219  if(v.index < slots_for_parameters)
3220  param_names.emplace(
3221  v.index,
3222  make_pair(
3223  v.name, id2string(method_symbol.name) + "::" + id2string(v.name)));
3224  }
3225 
3226  // Assign names to parameters
3227  std::size_t param_index = 0;
3228  for(auto &param : parameters)
3229  {
3230  irep_idt base_name, identifier;
3231 
3232  // Construct a sensible base name (base_name) and a fully qualified name
3233  // (identifier) for every parameter of the method under translation,
3234  // regardless of whether we have an LVT or not; and assign it to the
3235  // parameter object (which is stored in the type of the symbol, not in the
3236  // symbol table)
3237  if(param_index == 0 && param.get_this())
3238  {
3239  // my.package.ClassName.myMethodName:(II)I::this
3240  base_name = ID_this;
3241  identifier = id2string(method_symbol.name) + "::" + id2string(base_name);
3242  }
3243  else
3244  {
3245  auto param_name = param_names.find(param_index);
3246  if(param_name != param_names.end())
3247  {
3248  base_name = param_name->second.first;
3249  identifier = param_name->second.second;
3250  }
3251  else
3252  {
3253  // my.package.ClassName.myMethodName:(II)I::argNT, where N is the local
3254  // variable slot where the parameter is stored and T is a character
3255  // indicating the type
3256  const typet &type = param.type();
3257  char suffix = java_char_from_type(type);
3258  base_name = "arg" + std::to_string(param_index) + suffix;
3259  identifier =
3260  id2string(method_symbol.name) + "::" + id2string(base_name);
3261  }
3262  }
3263  param.set_base_name(base_name);
3264  param.set_identifier(identifier);
3265 
3266  // Build a new symbol for the parameter and add it to the symbol table
3267  parameter_symbolt parameter_symbol;
3268  parameter_symbol.base_name = base_name;
3269  parameter_symbol.mode = ID_java;
3270  parameter_symbol.name = identifier;
3271  parameter_symbol.type = param.type();
3272  symbol_table.insert(parameter_symbol);
3273 
3274  param_index += java_local_variable_slots(param.type());
3275  }
3276 }
3277 
3279  const symbolt &class_symbol,
3280  const java_bytecode_parse_treet::methodt &method,
3281  symbol_table_baset &symbol_table,
3282  message_handlert &message_handler,
3283  size_t max_array_length,
3284  bool throw_assertion_error,
3285  std::optional<ci_lazy_methods_neededt> needed_lazy_methods,
3286  java_string_library_preprocesst &string_preprocess,
3287  const class_hierarchyt &class_hierarchy,
3288  bool threading_support,
3289  const std::optional<prefix_filtert> &method_context,
3290  bool assert_no_exceptions_thrown)
3291 
3292 {
3294  symbol_table,
3295  message_handler,
3296  max_array_length,
3297  throw_assertion_error,
3298  needed_lazy_methods,
3299  string_preprocess,
3300  class_hierarchy,
3301  threading_support,
3302  assert_no_exceptions_thrown);
3303 
3304  java_bytecode_convert_method(class_symbol, method, method_context);
3305 }
3306 
3313  const irep_idt &classname,
3314  const irep_idt &mangled_method_name) const
3315 {
3316  const auto inherited_method = get_inherited_method_implementation(
3317  mangled_method_name, classname, symbol_table);
3318 
3319  return inherited_method.has_value();
3320 }
3321 
3328  const irep_idt &class_identifier,
3329  const irep_idt &component_name) const
3330 {
3331  const auto inherited_method = get_inherited_component(
3332  class_identifier, component_name, symbol_table, true);
3333 
3334  INVARIANT(
3335  inherited_method.has_value(), "static field should be in symbol table");
3336 
3337  return inherited_method->get_full_component_identifier();
3338 }
3339 
3347  const std::string &tmp_var_prefix,
3348  code_blockt &block,
3349  const bytecode_write_typet write_type,
3350  const irep_idt &identifier)
3351 {
3352  const std::function<bool(
3353  const std::function<tvt(const exprt &expr)>, const exprt &expr)>
3354  entry_matches = [&entry_matches](
3355  const std::function<tvt(const exprt &expr)> predicate,
3356  const exprt &expr) {
3357  const tvt &tvres = predicate(expr);
3358  if(tvres.is_unknown())
3359  {
3360  return std::any_of(
3361  expr.operands().begin(),
3362  expr.operands().end(),
3363  [&predicate, &entry_matches](const exprt &expr) {
3364  return entry_matches(predicate, expr);
3365  });
3366  }
3367  else
3368  {
3369  return tvres.is_true();
3370  }
3371  };
3372 
3373  // Function that checks whether the expression accesses a member with the
3374  // given identifier name. These accesses are created in the case of `iinc`, or
3375  // non-array `?store` instructions.
3376  const std::function<tvt(const exprt &expr)> has_member_entry = [&identifier](
3377  const exprt &expr) {
3378  const auto member_expr = expr_try_dynamic_cast<member_exprt>(expr);
3379  return !member_expr ? tvt::unknown()
3380  : tvt(member_expr->get_component_name() == identifier);
3381  };
3382 
3383  // Function that checks whether the expression is a symbol with the given
3384  // identifier name. These accesses are created in the case of `putstatic` or
3385  // `putfield` instructions.
3386  const std::function<tvt(const exprt &expr)> is_symbol_entry =
3387  [&identifier](const exprt &expr) {
3388  const auto symbol_expr = expr_try_dynamic_cast<symbol_exprt>(expr);
3389  return !symbol_expr ? tvt::unknown()
3390  : tvt(symbol_expr->get_identifier() == identifier);
3391  };
3392 
3393  // Function that checks whether the expression is a dereference
3394  // expression. These accesses are created in `?astore` array write
3395  // instructions.
3396  const std::function<tvt(const exprt &expr)> is_dereference_entry =
3397  [](const exprt &expr) {
3398  const auto dereference_expr =
3399  expr_try_dynamic_cast<dereference_exprt>(expr);
3400  return !dereference_expr ? tvt::unknown() : tvt(true);
3401  };
3402 
3403  for(auto &stack_entry : stack)
3404  {
3405  bool replace = false;
3406  switch(write_type)
3407  {
3410  replace = entry_matches(is_symbol_entry, stack_entry);
3411  break;
3413  replace = entry_matches(is_dereference_entry, stack_entry);
3414  break;
3416  replace = entry_matches(has_member_entry, stack_entry);
3417  break;
3418  }
3419  if(replace)
3420  {
3422  tmp_var_prefix, stack_entry.type(), block, stack_entry);
3423  }
3424  }
3425 }
3426 
3429  const std::string &tmp_var_prefix,
3430  const typet &tmp_var_type,
3431  code_blockt &block,
3432  exprt &stack_entry)
3433 {
3434  const exprt tmp_var=
3435  tmp_variable(tmp_var_prefix, tmp_var_type);
3436  block.add(code_assignt(tmp_var, stack_entry));
3437  stack_entry=tmp_var;
3438 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
API to expression classes for bitvectors.
const bitvector_typet & to_bitvector_type(const typet &type)
Cast a typet to a bitvector_typet.
const integer_bitvector_typet & to_integer_bitvector_type(const typet &type)
Cast a typet to an integer_bitvector_typet.
void branch(goto_modelt &goto_model, const irep_idt &id)
Definition: branch.cpp:21
struct bytecode_infot const bytecode_info[]
#define BC_ifnull
#define BC_jsr
#define BC_invokestatic
#define BC_iinc
#define BC_tableswitch
#define BC_drem
#define BC_d2l
#define BC_dup_x1
#define BC_invokespecial
#define BC_getfield
#define BC_nop
Definition: bytecode_info.h:65
#define BC_pop2
#define BC_instanceof
#define BC_goto
#define BC_arraylength
#define BC_dup_x2
#define BC_ldc
Definition: bytecode_info.h:83
#define BC_ldc2_w
Definition: bytecode_info.h:85
#define BC_ldiv
#define BC_iconst_m1
Definition: bytecode_info.h:67
#define BC_monitorexit
#define BC_new
#define BC_lookupswitch
#define BC_ifge
#define BC_newarray
#define BC_getstatic
#define BC_jsr_w
#define BC_anewarray
#define BC_lrem
#define BC_dup2
#define BC_aconst_null
Definition: bytecode_info.h:66
#define BC_pop
#define BC_return
#define BC_d2i
#define BC_dup
#define BC_athrow
#define BC_ifgt
#define BC_putfield
#define BC_f2i
#define BC_idiv
#define BC_ifeq
#define BC_monitorenter
#define BC_ret
#define BC_goto_w
#define BC_swap
#define BC_iflt
#define BC_checkcast
#define BC_putstatic
#define BC_ldc_w
Definition: bytecode_info.h:84
#define BC_multianewarray
#define BC_dup2_x2
uint8_t u1
Definition: bytecode_info.h:55
#define BC_dup2_x1
#define BC_invokedynamic
#define BC_irem
#define BC_invokeinterface
#define BC_frem
#define BC_ifnonnull
#define BC_invokevirtual
#define BC_ifle
#define BC_f2l
#define BC_ifne
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:235
Arithmetic right shift.
Internally generated symbol table entryThis is a symbol generated as part of translation to or modifi...
Definition: symbol.h:153
A base class for binary expressions.
Definition: std_expr.h:638
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition: std_expr.h:762
Bit-wise AND.
Bit-wise OR.
std::size_t get_width() const
Definition: std_types.h:925
Bit-wise XOR.
The Boolean type.
Definition: std_types.h:36
Non-graph-based representation of the class hierarchy.
An expression describing a method on a class.
Definition: std_expr.h:3503
const irep_idt & get_identifier() const
A unique identifier of the combination of class and method overload to which this expression refers.
Definition: std_expr.h:3563
const irep_idt & class_id() const
Unique identifier in the symbol table, of the compile time type of the class which this expression is...
Definition: std_expr.h:3548
const irep_idt & base_method_name() const
The name of the method to which this expression is applied as would be seen in the source code.
Definition: std_expr.h:3555
const irep_idt & mangled_method_name() const
The method name after mangling it by combining it with the descriptor.
Definition: std_expr.h:3540
A non-fatal assertion, which checks a condition then permits execution to continue.
Definition: std_code.h:270
A goto_instruction_codet representing an assignment in the program.
An assumption, which must hold in subsequent code.
Definition: std_code.h:217
A codet representing sequential composition of program statements.
Definition: std_code.h:130
void append(const code_blockt &extra_block)
Add all the codets from extra_block to the current code_blockt.
Definition: std_code.cpp:86
void add(const codet &code)
Definition: std_code.h:168
code_operandst & statements()
Definition: std_code.h:138
codet & find_last_statement()
Definition: std_code.cpp:96
A goto_instruction_codet representing the declaration of a local variable.
codet representation of an expression statement.
Definition: std_code.h:1394
codet representation of a "return from a function" statement.
Definition: std_code.h:893
goto_instruction_codet representation of a function call statement.
exprt::operandst argumentst
codet representation of a goto statement.
Definition: std_code.h:841
codet representation of an if-then-else statement.
Definition: std_code.h:460
const codet & then_case() const
Definition: std_code.h:488
const exprt & cond() const
Definition: std_code.h:478
codet representation of a label for branch targets.
Definition: std_code.h:959
codet & code()
Definition: std_code.h:977
void set_label(const irep_idt &label)
Definition: std_code.h:972
A statement that catches an exception, assigning the exception in flight to an expression (e....
Definition: std_code.h:1936
Pops an exception handler from the stack of active handlers (i.e.
Definition: std_code.h:1899
Pushes an exception handler, of the form: exception_tag1 -> label1 exception_tag2 -> label2 ....
Definition: std_code.h:1805
std::vector< exception_list_entryt > exception_listt
Definition: std_code.h:1849
exception_listt & exception_list()
Definition: std_code.h:1860
A codet representing a skip statement.
Definition: std_code.h:322
codet representation of a switch-case, i.e. a case statement within a switch.
Definition: std_code.h:1023
void set_default()
Definition: std_code.h:1035
codet representing a switch statement.
Definition: std_code.h:548
void set_base_name(const irep_idt &name)
Definition: std_types.h:629
void set_is_constructor()
Definition: std_types.h:734
const typet & return_type() const
Definition: std_types.h:689
const irep_idt & get_access() const
Definition: std_types.h:719
bool has_this() const
Definition: std_types.h:660
void set_access(const irep_idt &access)
Definition: std_types.h:724
bool get_is_constructor() const
Definition: std_types.h:729
const parameterst & parameters() const
Definition: std_types.h:699
Data structure for representing an arbitrary statement in a program.
Definition: std_code_base.h:29
const irep_idt & get_statement() const
Definition: std_code_base.h:65
A constant literal expression.
Definition: std_expr.h:2990
Operator to dereference a pointer.
Definition: pointer_expr.h:834
Division.
Definition: std_expr.h:1152
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
bool empty() const
Definition: dstring.h:89
Equality.
Definition: std_expr.h:1361
Base class for all expressions.
Definition: expr.h:56
std::vector< exprt > operandst
Definition: expr.h:58
void copy_to_operands(const exprt &expr)
Copy the given argument to the end of exprt's operands.
Definition: expr.h:163
source_locationt & add_source_location()
Definition: expr.h:236
const source_locationt & source_location() const
Definition: expr.h:231
typet & type()
Return the type of the expression.
Definition: expr.h:84
bool is_constant() const
Return whether the expression is a constant.
Definition: expr.h:212
operandst & operands()
Definition: expr.h:94
The Boolean constant false.
Definition: std_expr.h:3072
Represents the argument of an instruction that uses a CONSTANT_Fieldref This is used for example as a...
irep_idt class_name() const
irep_idt component_name() const
IEEE-floating-point equality.
Definition: floatbv_expr.h:264
static ieee_float_spect single_precision()
Definition: ieee_float.h:70
static ieee_float_spect double_precision()
Definition: ieee_float.h:76
constant_exprt to_expr() const
Definition: ieee_float.cpp:703
void from_expr(const constant_exprt &expr)
void from_integer(const mp_integer &i)
Definition: ieee_float.cpp:516
The trinary if-then-else operator.
Definition: std_expr.h:2370
constant_exprt largest_expr() const
Return an expression representing the largest value of this type.
constant_exprt smallest_expr() const
Return an expression representing the smallest value of this type.
There are a large number of kinds of tree structured or tree-like data in CPROVER.
Definition: irep.h:364
const irept & find(const irep_idt &name) const
Definition: irep.cpp:93
const irep_idt & get(const irep_idt &name) const
Definition: irep.cpp:44
void set(const irep_idt &name, const irep_idt &value)
Definition: irep.h:412
const irep_idt & id() const
Definition: irep.h:388
void swap(irept &irep)
Definition: irep.h:434
const std::string & get_string(const irep_idt &name) const
Definition: irep.h:401
Evaluates to true if the operand is NaN.
Definition: floatbv_expr.h:88
void convert_dup2_x2(exprt::operandst &op, exprt::operandst &results)
method_offsett slots_for_parameters
Number of local variable slots used by the JVM to pass parameters upon invocation of the method under...
irep_idt get_static_field(const irep_idt &class_identifier, const irep_idt &component_name) const
Get static field identifier referred to by class_identifier.component_name Note this may be inherited...
void draw_edges_from_ret_to_jsr(address_mapt &address_map, const std::vector< method_offsett > &jsr_ret_targets, const std::vector< std::vector< java_bytecode_parse_treet::instructiont >::const_iterator > &ret_instructions) const
code_ifthenelset convert_if_cmp(const java_bytecode_convert_methodt::address_mapt &address_map, const u1 bytecode, const exprt::operandst &op, const mp_integer &number, const source_locationt &location) const
void create_stack_tmp_var(const std::string &, const typet &, code_blockt &, exprt &)
actually create a temporary variable to hold the value of a stack entry
std::optional< ci_lazy_methods_neededt > needed_lazy_methods
codet & do_exception_handling(const methodt &method, const std::set< method_offsett > &working_set, method_offsett cur_pc, codet &c)
exprt::operandst & convert_ushr(const irep_idt &statement, const exprt::operandst &op, exprt::operandst &results) const
exprt convert_load(const exprt &index, char type_char, size_t address)
Load reference from local variable.
void setup_local_variables(const methodt &m, const address_mapt &amap)
See find_initializers_for_slot above for more detail.
const variablet & find_variable_for_slot(size_t address, variablest &var_list)
See above.
void push(const exprt::operandst &o)
code_blockt convert_store(const irep_idt &statement, const exprt &arg0, const exprt::operandst &op, const method_offsett address, const source_locationt &location)
code_blockt convert_astore(const irep_idt &statement, const exprt::operandst &op, const source_locationt &location)
std::optional< exprt > convert_invoke_dynamic(const source_locationt &location, std::size_t instruction_address, const exprt &arg0, codet &result_code)
static irep_idt label(const irep_idt &address)
std::vector< method_offsett > try_catch_handler(method_offsett address, const java_bytecode_parse_treet::methodt::exception_tablet &exception_table) const
code_blockt convert_instructions(const methodt &)
codet & replace_call_to_cprover_assume(source_locationt location, codet &c)
java_string_library_preprocesst & string_preprocess
void convert(const symbolt &class_symbol, const methodt &, const std::optional< prefix_filtert > &method_context)
code_blockt convert_ret(const std::vector< method_offsett > &jsr_ret_targets, const exprt &arg0, const source_locationt &location, const method_offsett address)
codet get_clinit_call(const irep_idt &classname)
Each static access to classname should be prefixed with a check for necessary static init; this retur...
code_blockt convert_putstatic(const source_locationt &location, const exprt &arg0, const exprt::operandst &op, const symbol_exprt &symbol_expr)
code_ifthenelset convert_if(const java_bytecode_convert_methodt::address_mapt &address_map, const exprt::operandst &op, const irep_idt &id, const mp_integer &number, const source_locationt &location) const
code_ifthenelset convert_ifnull(const java_bytecode_convert_methodt::address_mapt &address_map, const exprt::operandst &op, const mp_integer &number, const source_locationt &location) const
code_blockt & get_block_for_pcrange(block_tree_nodet &tree, code_blockt &this_block, method_offsett address_start, method_offsett address_limit, method_offsett next_block_start_address)
'tree' describes a tree of code_blockt objects; this_block is the corresponding block (thus they are ...
codet convert_pop(const irep_idt &statement, const exprt::operandst &op)
exprt::operandst pop(std::size_t n)
exprt::operandst & convert_const(const irep_idt &statement, const constant_exprt &arg0, exprt::operandst &results) const
void convert_checkcast(const exprt &arg0, const exprt::operandst &op, codet &c, exprt::operandst &results) const
void convert_dup2_x1(exprt::operandst &op, exprt::operandst &results)
code_blockt convert_putfield(const fieldref_exprt &arg0, const exprt::operandst &op)
code_blockt convert_multianewarray(const source_locationt &location, const exprt &arg0, const exprt::operandst &op, exprt::operandst &results)
code_ifthenelset convert_ifnonull(const java_bytecode_convert_methodt::address_mapt &address_map, const exprt::operandst &op, const mp_integer &number, const source_locationt &location) const
code_blockt convert_parameter_annotations(const methodt &method, const java_method_typet &method_type)
code_switcht convert_switch(const exprt::operandst &op, const java_bytecode_parse_treet::instructiont::argst &args, const source_locationt &location)
void save_stack_entries(const std::string &, code_blockt &, const bytecode_write_typet, const irep_idt &)
Create temporary variables if a write instruction can have undesired side- effects.
void convert_new(const source_locationt &location, const exprt &arg0, codet &c, exprt::operandst &results)
exprt variable(const exprt &arg, char type_char, size_t address)
Returns an expression indicating a local variable suitable to load/store from a bytecode at address a...
std::map< method_offsett, converted_instructiont > address_mapt
static void replace_goto_target(codet &repl, const irep_idt &old_label, const irep_idt &new_label)
Find all goto statements in 'repl' that target 'old_label' and redirect them to 'new_label'.
code_blockt convert_iinc(const exprt &arg0, const exprt &arg1, const source_locationt &location, method_offsett address)
exprt::operandst & convert_cmp2(const irep_idt &statement, const exprt::operandst &op, exprt::operandst &results) const
void convert_dup2(exprt::operandst &op, exprt::operandst &results)
bool is_method_inherited(const irep_idt &classname, const irep_idt &mangled_method_name) const
Returns true iff method methodid from class classname is a method inherited from a class or interface...
void convert_athrow(const source_locationt &location, const exprt::operandst &op, codet &c, exprt::operandst &results) const
typet method_return_type
Return type of the method under conversion.
void convert_getstatic(const source_locationt &source_location, const exprt &arg0, const symbol_exprt &symbol_expr, bool is_assertions_disabled_field, codet &c, exprt::operandst &results)
irep_idt method_id
Fully qualified name of the method under translation.
code_blockt convert_newarray(const source_locationt &location, const irep_idt &statement, const exprt &arg0, const exprt::operandst &op, exprt::operandst &results)
exprt::operandst & convert_shl(const irep_idt &statement, const exprt::operandst &op, exprt::operandst &results) const
exprt::operandst & convert_cmp(const exprt::operandst &op, exprt::operandst &results) const
void pop_residue(std::size_t n)
removes minimum(n, stack.size()) elements from the stack
void convert_invoke(source_locationt location, const irep_idt &statement, class_method_descriptor_exprt &class_method_descriptor, codet &c, exprt::operandst &results)
code_blockt & get_or_create_block_for_pcrange(block_tree_nodet &tree, code_blockt &this_block, method_offsett address_start, method_offsett address_limit, method_offsett next_block_start_address, const address_mapt &amap, bool allow_merge=true)
As above, but this version can additionally create a new branch in the block_tree-node and code_block...
codet convert_monitorenterexit(const irep_idt &statement, const exprt::operandst &op, const source_locationt &source_location)
irep_idt current_method
A copy of method_id :/.
static exprt convert_aload(const irep_idt &statement, const exprt::operandst &op)
symbol_exprt tmp_variable(const std::string &prefix, const typet &type)
const methodst & methods() const
Definition: java_types.h:299
void add_throws_exception(irep_idt exception)
Definition: java_types.h:131
std::vector< parametert > parameterst
Definition: std_types.h:585
void set_is_varargs(bool is_varargs)
Definition: java_types.h:161
void set_is_synthetic(bool is_synthetic)
Definition: java_types.h:171
void set_is_final(bool is_final)
Definition: java_types.h:141
void set_native(bool is_native)
Definition: java_types.h:151
This is a specialization of reference_typet.
Definition: java_types.h:602
codet replace_character_call(code_function_callt call)
Logical right shift.
Extract member of struct or union.
Definition: std_expr.h:2844
source_locationt source_location
Definition: message.h:239
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:154
mstreamt & error() const
Definition: message.h:391
mstreamt & debug() const
Definition: message.h:421
message_handlert & get_message_handler()
Definition: message.h:183
static eomt eom
Definition: message.h:289
Binary minus.
Definition: std_expr.h:1061
Modulo defined as lhs-(rhs * truncate(lhs/rhs)).
Definition: std_expr.h:1223
Binary multiplication Associativity is not specified.
Definition: std_expr.h:1107
const union_typet & follow_tag(const union_tag_typet &) const
Follow type tag of union type.
Definition: namespace.cpp:63
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:94
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
Definition: namespace.cpp:148
The NIL expression.
Definition: std_expr.h:3081
Disequality.
Definition: std_expr.h:1420
The null pointer constant.
Definition: pointer_expr.h:909
Boolean OR.
Definition: std_expr.h:2228
Symbol table entry of function parameterThis is a symbol generated as part of type checking.
Definition: symbol.h:179
Given a string of the format '?blah?', will return true when compared against a string that matches a...
Definition: pattern.h:21
The plus expression Associativity is not specified.
Definition: std_expr.h:1002
const typet & base_type() const
The type of the data what we point to.
Definition: pointer_expr.h:35
The reference type.
Definition: pointer_expr.h:121
Left shift.
A side_effect_exprt representation of a side effect that throws an exception.
Definition: std_code.h:1757
An expression containing a side effect.
Definition: std_code.h:1450
void set_comment(const irep_idt &comment)
void set_property_class(const irep_idt &property_class)
const irep_idt & get_line() const
std::string as_string() const
void set_function(const irep_idt &function)
A struct tag type, i.e., struct_typet with an identifier.
Definition: std_types.h:493
const componentst & components() const
Definition: std_types.h:147
Expression to hold a symbol (variable)
Definition: std_expr.h:131
static symbol_exprt typeless(const irep_idt &id)
Generate a symbol_exprt without a proper type.
Definition: std_expr.h:150
const irep_idt & get_identifier() const
Definition: std_expr.h:160
The symbol table base class interface.
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
const symbolst & symbols
Read-only field, used to look up symbols given their names.
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
virtual std::pair< symbolt &, bool > insert(symbolt symbol)=0
Move or copy a new symbol to the symbol table.
Symbol table entry.
Definition: symbol.h:28
irep_idt base_name
Base (non-scoped) name.
Definition: symbol.h:46
irep_idt module
Name of module the symbol belongs to.
Definition: symbol.h:43
bool is_static_lifetime
Definition: symbol.h:70
source_locationt location
Source code location of definition of symbol.
Definition: symbol.h:37
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:121
typet type
Type of symbol.
Definition: symbol.h:31
irep_idt name
The unique identifier.
Definition: symbol.h:40
irep_idt pretty_name
Language-specific display name.
Definition: symbol.h:52
exprt value
Initial value of symbol.
Definition: symbol.h:34
irep_idt mode
Language mode.
Definition: symbol.h:49
const irep_idt & get_identifier() const
Definition: std_types.h:410
Definition: threeval.h:20
bool is_unknown() const
Definition: threeval.h:27
bool is_true() const
Definition: threeval.h:25
static tvt unknown()
Definition: threeval.h:33
Semantic type conversion.
Definition: std_expr.h:2068
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition: std_expr.h:2076
The type of an expression, extends irept.
Definition: type.h:29
The unary minus expression.
Definition: std_expr.h:484
static irep_idt get_exception_type(const pointer_typet &)
Returns the compile type of an exception.
Fixed-width bit-vector with unsigned binary interpretation.
An exception that is raised for unsupported class signature.
Definition: java_types.h:1131
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
int isdigit(int c)
Definition: ctype.c:24
std::optional< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Expression Initialization.
API to expression classes for floating-point arithmetic.
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
static int8_t r
Definition: irep_hash.h:60
void convert_annotations(const java_bytecode_parse_treet::annotationst &parsed_annotations, std::vector< java_annotationt > &java_annotations)
Convert parsed annotations into the symbol table.
static void gather_symbol_live_ranges(java_bytecode_convert_methodt::method_offsett pc, const exprt &e, std::map< irep_idt, java_bytecode_convert_methodt::variablet > &result)
static bool is_constructor(const irep_idt &method_name)
static irep_idt get_method_identifier(const irep_idt &class_identifier, const java_bytecode_parse_treet::methodt &method)
void java_bytecode_initialize_parameter_names(symbolt &method_symbol, const java_bytecode_parse_treet::methodt::local_variable_tablet &local_variable_table, symbol_table_baset &symbol_table)
This uses a cut-down version of the logic in java_bytecode_convert_methodt::convert to initialize sym...
static member_exprt to_member(const exprt &pointer, const fieldref_exprt &field_reference, const namespacet &ns)
Build a member exprt for accessing a specific field that may come from a base class.
static exprt conditional_array_cast(const exprt &expr, char type_char)
Add typecast if necessary to expr to make it compatible with array type corresponding to type_char (s...
void create_parameter_symbols(const java_method_typet::parameterst &parameters, expanding_vectort< std::vector< java_bytecode_convert_methodt::variablet >> &variables, symbol_table_baset &symbol_table)
void java_bytecode_convert_method(const symbolt &class_symbol, const java_bytecode_parse_treet::methodt &method, symbol_table_baset &symbol_table, message_handlert &message_handler, size_t max_array_length, bool throw_assertion_error, std::optional< ci_lazy_methods_neededt > needed_lazy_methods, java_string_library_preprocesst &string_preprocess, const class_hierarchyt &class_hierarchy, bool threading_support, const std::optional< prefix_filtert > &method_context, bool assert_no_exceptions_thrown)
void create_parameter_names(const java_bytecode_parse_treet::methodt &m, const irep_idt &method_identifier, java_method_typet::parameterst &parameters, const java_bytecode_convert_methodt::method_offsett &slots_for_parameters)
Extracts the names of parameters from the local variable table in the method, and uses it to construc...
void create_method_stub_symbol(const irep_idt &identifier, const irep_idt &base_name, const irep_idt &pretty_name, const typet &type, const irep_idt &declaring_class, symbol_table_baset &symbol_table, message_handlert &message_handler)
java_method_typet member_type_lazy(const std::string &descriptor, const std::optional< std::string > &signature, const std::string &class_name, const std::string &method_name, message_handlert &message_handler)
Returns the member type for a method, based on signature or descriptor.
static std::size_t get_bytecode_type_width(const typet &ty)
static void adjust_invoke_argument_types(const java_method_typet::parameterst &parameters, code_function_callt::argumentst &arguments)
static void assign_parameter_names(java_method_typet &ftype, const irep_idt &name_prefix, symbol_table_baset &symbol_table)
Iterates through the parameters of the function type ftype, finds a new new name for each parameter a...
static irep_idt get_if_cmp_operator(const u1 bytecode)
void java_bytecode_convert_method_lazy(symbolt &class_symbol, const irep_idt &method_identifier, const java_bytecode_parse_treet::methodt &m, symbol_table_baset &symbol_table, message_handlert &message_handler)
This creates a method symbol in the symtab, but doesn't actually perform method conversion just yet.
JAVA Bytecode Language Conversion.
JAVA Bytecode Language Conversion.
Java-specific exprt subclasses.
irep_idt clinit_wrapper_name(const irep_idt &class_name)
Get the Java static initializer wrapper name for a given class (the wrapper checks if static initiali...
Produce code for simple implementation of String Java libraries.
Representation of a constant Java string.
bool can_cast_expr< java_string_literal_exprt >(const exprt &base)
signedbv_typet java_int_type()
Definition: java_types.cpp:31
typet java_type_from_char(char t)
Constructs a type indicated by the given character:
Definition: java_types.cpp:248
java_reference_typet java_reference_array_type(const struct_tag_typet &subtype)
Definition: java_types.cpp:541
empty_typet java_void_type()
Definition: java_types.cpp:37
char java_char_from_type(const typet &type)
Definition: java_types.cpp:708
const typet & java_array_element_type(const struct_tag_typet &array_symbol)
Return a const reference to the element type of a given java array type.
Definition: java_types.cpp:144
signedbv_typet java_byte_type()
Definition: java_types.cpp:55
std::string pretty_signature(const java_method_typet &method_type)
typet java_bytecode_promotion(const typet &type)
Java does not support byte/short return types. These are always promoted.
Definition: java_types.cpp:269
java_reference_typet java_array_type(const char subtype)
Construct an array pointer type.
Definition: java_types.cpp:109
signedbv_typet java_short_type()
Definition: java_types.cpp:49
std::optional< typet > java_type_from_string(const std::string &src, const std::string &class_name_prefix)
Transforms a string representation of a Java type into an internal type representation thereof.
Definition: java_types.cpp:561
reference_typet java_reference_type(const typet &subtype)
Definition: java_types.cpp:88
c_bool_typet java_boolean_type()
Definition: java_types.cpp:79
unsignedbv_typet java_char_type()
Definition: java_types.cpp:61
const java_class_typet & to_java_class_type(const typet &type)
Definition: java_types.h:581
const java_method_typet & to_java_method_type(const typet &type)
Definition: java_types.h:183
std::optional< irep_idt > declaring_class(const symbolt &symbol)
Gets the identifier of the class which declared a given symbol.
Definition: java_utils.cpp:568
void set_declaring_class(symbolt &symbol, const irep_idt &declaring_class)
Sets the identifier of the class which declared a given symbol to declaring_class.
Definition: java_utils.cpp:574
unsigned java_method_parameter_slots(const java_method_typet &t)
Returns the the number of JVM local variables (slots) used by the JVM to pass, upon call,...
Definition: java_utils.cpp:147
irep_idt strip_java_namespace_prefix(const irep_idt &to_strip)
Strip java:: prefix from given identifier.
Definition: java_utils.cpp:407
dereference_exprt checked_dereference(const exprt &expr)
Dereference an expression and flag it for a null-pointer check.
Definition: java_utils.cpp:281
unsigned java_local_variable_slots(const typet &t)
Returns the number of JVM local variables (slots) taken by a local variable that, when translated to ...
Definition: java_utils.cpp:128
std::optional< resolve_inherited_componentt::inherited_componentt > get_inherited_component(const irep_idt &component_class_id, const irep_idt &component_name, const symbol_table_baset &symbol_table, bool include_interfaces)
Finds an inherited component (method or field), taking component visibility into account.
Definition: java_utils.cpp:448
void merge_source_location_rec(exprt &expr, const source_locationt &source_location)
Attaches a source location to an expression and all of its subexpressions.
Definition: java_utils.cpp:198
irep_idt lambda_synthetic_class_name(const irep_idt &method_identifier, std::size_t instruction_address)
static symbolt constructor_symbol(synthetic_methods_mapt &synthetic_methods, const irep_idt &synthetic_class_name, java_method_typet constructor_type)
Java lambda code synthesis.
double log(double x)
Definition: math.c:2776
const mp_integer string2integer(const std::string &n, unsigned base)
Definition: mp_arith.cpp:54
const std::string integer2string(const mp_integer &n, unsigned base)
Definition: mp_arith.cpp:103
Pattern matching for bytecode instructions.
bool can_cast_type< reference_typet >(const typet &type)
Check whether a reference to a typet is a reference_typet.
Definition: pointer_expr.h:149
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: pointer_expr.h:93
const reference_typet & to_reference_type(const typet &type)
Cast a typet to a reference_typet.
Definition: pointer_expr.h:162
Prefix Filtering.
std::optional< resolve_inherited_componentt::inherited_componentt > get_inherited_method_implementation(const irep_idt &call_basename, const irep_idt &classname, const symbol_table_baset &symbol_table)
Given a class and a component, identify the concrete method it is resolved to.
Given a class and a component (either field or method), find the closest parent that defines that com...
BigInt mp_integer
Definition: smt_terms.h:17
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:534
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition: invariant.h:423
const code_gotot & to_code_goto(const codet &code)
Definition: std_code.h:875
const code_labelt & to_code_label(const codet &code)
Definition: std_code.h:1004
const code_blockt & to_code_block(const codet &code)
Definition: std_code.h:203
const codet & to_code(const exprt &expr)
API to expression classes.
const constant_exprt & to_constant_expr(const exprt &expr)
Cast an exprt to a constant_exprt.
Definition: std_expr.h:3045
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:272
const code_typet & to_code_type(const typet &type)
Cast a typet to a code_typet.
Definition: std_types.h:788
const struct_tag_typet & to_struct_tag_type(const typet &type)
Cast a typet to a struct_tag_typet.
Definition: std_types.h:518
bool can_cast_type< bitvector_typet >(const typet &type)
Check whether a reference to a typet is a bitvector_typet.
Definition: std_types.h:952
void replace(const union_find_replacet &replace_map, string_not_contains_constraintt &constraint)
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
const char * mnemonic
Definition: bytecode_info.h:46
std::optional< std::string > signature
std::vector< annotationst > parameter_annotations
Java annotations that were applied to parameters of this method.
std::vector< local_variablet > local_variable_tablet
static std::optional< annotationt > find_annotation(const annotationst &annotations, const irep_idt &annotation_type_name)
Find an annotation given its name.
Author: Diffblue Ltd.
Over-approximative uncaught exceptions analysis.
dstringt irep_idt