CBMC
java_bytecode_parser.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module:
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
9 #include "java_bytecode_parser.h"
10 
11 #include <algorithm>
12 #include <fstream>
13 #include <map>
14 #include <string>
15 
16 #include <util/arith_tools.h>
17 #include <util/ieee_float.h>
18 #include <util/parser.h>
19 #include <util/std_expr.h>
20 #include <util/string_constant.h>
21 
22 #include "bytecode_info.h"
25 #include "java_types.h"
26 
27 class java_bytecode_parsert final : public parsert
28 {
29 public:
31  bool skip_instructions,
32  message_handlert &message_handler)
33  : parsert(message_handler), skip_instructions(skip_instructions)
34  {
35  }
36 
37  bool parse() override;
38 
39  struct pool_entryt
40  {
41  u1 tag = 0;
42  u2 ref1 = 0;
43  u2 ref2 = 0;
45  u8 number = 0;
47  };
48 
50 
51 private:
60 
61  using constant_poolt = std::vector<pool_entryt>;
62 
64 
65  const bool skip_instructions = false;
66 
68  {
69  if(index==0 || index>=constant_pool.size())
70  {
71  log.error() << "invalid constant pool index (" << index << ")"
72  << messaget::eom;
73  log.error() << "constant pool size: " << constant_pool.size()
74  << messaget::eom;
75  throw 0;
76  }
77 
78  return constant_pool[index];
79  }
80 
81  exprt &constant(u2 index)
82  {
83  return pool_entry(index).expr;
84  }
85 
86  const typet type_entry(u2 index)
87  {
88  return *java_type_from_string(id2string(pool_entry(index).s));
89  }
90 
91  void rClassFile();
92  void rconstant_pool();
93  void rinterfaces();
94  void rfields();
95  void rmethods();
96  void rmethod();
97  void rinner_classes_attribute(const u4 &attribute_length);
98  std::vector<irep_idt> rexceptions_attribute();
99  void rclass_attribute();
100  void rRuntimeAnnotation_attribute(std::vector<annotationt> &);
104  void rmethod_attribute(methodt &method);
105  void rfield_attribute(fieldt &);
106  void rcode_attribute(methodt &method);
107  void read_verification_type_info(methodt::verification_type_infot &);
108  void rbytecode(std::vector<instructiont> &);
109  void get_class_refs();
110  void get_class_refs_rec(const typet &);
111  void get_annotation_class_refs(const std::vector<annotationt> &annotations);
112  void get_annotation_value_class_refs(const exprt &value);
114  std::optional<lambda_method_handlet>
115  parse_method_handle(const class method_handle_infot &entry);
117 
118  void skip_bytes(std::size_t bytes)
119  {
120  for(std::size_t i=0; i<bytes; i++)
121  {
122  if(!*in)
123  {
124  log.error() << "unexpected end of bytecode file" << messaget::eom;
125  throw 0;
126  }
127  in->get();
128  }
129  }
130 
131  template <typename T>
132  T read()
133  {
134  static_assert(
135  std::is_unsigned<T>::value, "T should be an unsigned integer");
136  const constexpr size_t bytes = sizeof(T);
137  u8 result = 0;
138  for(size_t i = 0; i < bytes; i++)
139  {
140  if(!*in)
141  {
142  log.error() << "unexpected end of bytecode file" << messaget::eom;
143  throw 0;
144  }
145  result <<= 8u;
146  result |= static_cast<u1>(in->get());
147  }
148  return narrow_cast<T>(result);
149  }
150 
151  void store_unknown_method_handle(size_t bootstrap_method_index);
152 };
153 
154 #define CONSTANT_Class 7
155 #define CONSTANT_Fieldref 9
156 #define CONSTANT_Methodref 10
157 #define CONSTANT_InterfaceMethodref 11
158 #define CONSTANT_String 8
159 #define CONSTANT_Integer 3
160 #define CONSTANT_Float 4
161 #define CONSTANT_Long 5
162 #define CONSTANT_Double 6
163 #define CONSTANT_NameAndType 12
164 #define CONSTANT_Utf8 1
165 #define CONSTANT_MethodHandle 15
166 #define CONSTANT_MethodType 16
167 #define CONSTANT_InvokeDynamic 18
168 
169 #define VTYPE_INFO_TOP 0
170 #define VTYPE_INFO_INTEGER 1
171 #define VTYPE_INFO_FLOAT 2
172 #define VTYPE_INFO_LONG 3
173 #define VTYPE_INFO_DOUBLE 4
174 #define VTYPE_INFO_ITEM_NULL 5
175 #define VTYPE_INFO_UNINIT_THIS 6
176 #define VTYPE_INFO_OBJECT 7
177 #define VTYPE_INFO_UNINIT 8
178 
180 {
181 public:
183  using pool_entry_lookupt = std::function<pool_entryt &(u2)>;
184 
185  explicit structured_pool_entryt(const pool_entryt &entry) : tag(entry.tag)
186  {
187  }
188 
189  u1 get_tag() const
190  {
191  return tag;
192  }
193 
194 protected:
195  static std::string read_utf8_constant(const pool_entryt &entry)
196  {
197  INVARIANT(
198  entry.tag == CONSTANT_Utf8, "Name entry must be a constant UTF-8");
199  return id2string(entry.s);
200  }
201 
202 private:
204 };
205 
209 {
210 public:
211  explicit class_infot(const pool_entryt &entry) : structured_pool_entryt(entry)
212  {
213  PRECONDITION(entry.tag == CONSTANT_Class);
214  name_index = entry.ref1;
215  }
216 
217  std::string get_name(const pool_entry_lookupt &pool_entry) const
218  {
219  const pool_entryt &name_entry = pool_entry(name_index);
220  return read_utf8_constant(name_entry);
221  }
222 
223 private:
225 };
226 
230 {
231 public:
232  explicit name_and_type_infot(const pool_entryt &entry)
233  : structured_pool_entryt(entry)
234  {
236  name_index = entry.ref1;
237  descriptor_index = entry.ref2;
238  }
239 
240  std::string get_name(const pool_entry_lookupt &pool_entry) const
241  {
242  const pool_entryt &name_entry = pool_entry(name_index);
243  return read_utf8_constant(name_entry);
244  }
245 
246  std::string get_descriptor(const pool_entry_lookupt &pool_entry) const
247  {
248  const pool_entryt &descriptor_entry = pool_entry(descriptor_index);
249  return read_utf8_constant(descriptor_entry);
250  }
251 
252 private:
255 };
256 
258 {
259 public:
260  explicit base_ref_infot(const pool_entryt &entry)
261  : structured_pool_entryt(entry)
262  {
263  PRECONDITION(
264  entry.tag == CONSTANT_Fieldref || entry.tag == CONSTANT_Methodref ||
266  class_index = entry.ref1;
267  name_and_type_index = entry.ref2;
268  }
269 
271  {
272  return class_index;
273  }
275  {
276  return name_and_type_index;
277  }
278 
280  get_name_and_type(const pool_entry_lookupt &pool_entry) const
281  {
282  const pool_entryt &name_and_type_entry = pool_entry(name_and_type_index);
283 
284  INVARIANT(
285  name_and_type_entry.tag == CONSTANT_NameAndType,
286  "name_and_typeindex did not correspond to a name_and_type in the "
287  "constant pool");
288 
289  return name_and_type_infot{name_and_type_entry};
290  }
291 
292  class_infot get_class(const pool_entry_lookupt &pool_entry) const
293  {
294  const pool_entryt &class_entry = pool_entry(class_index);
295 
296  return class_infot{class_entry};
297  }
298 
299 private:
302 };
303 
305 {
306 public:
311  {
312  REF_getField = 1,
313  REF_getStatic = 2,
314  REF_putField = 3,
315  REF_putStatic = 4,
316  REF_invokeVirtual = 5,
317  REF_invokeStatic = 6,
318  REF_invokeSpecial = 7,
321  };
322 
323  explicit method_handle_infot(const pool_entryt &entry)
324  : structured_pool_entryt(entry)
325  {
327  PRECONDITION(entry.ref1 > 0 && entry.ref1 < 10); // Java 8 spec 4.4.8
328  handle_kind = static_cast<method_handle_kindt>(entry.ref1);
329  reference_index = entry.ref2;
330  }
331 
333  {
334  return handle_kind;
335  }
336 
338  {
339  const base_ref_infot ref_entry{pool_entry(reference_index)};
340 
341  // validate the correctness of the constant pool entry
342  switch(handle_kind)
343  {
348  {
349  INVARIANT(ref_entry.get_tag() == CONSTANT_Fieldref, "4.4.2");
350  break;
351  }
354  {
355  INVARIANT(ref_entry.get_tag() == CONSTANT_Methodref, "4.4.2");
356  break;
357  }
360  {
361  INVARIANT(
362  ref_entry.get_tag() == CONSTANT_Methodref ||
363  ref_entry.get_tag() == CONSTANT_InterfaceMethodref,
364  "4.4.2");
365  break;
366  }
368  {
369  INVARIANT(ref_entry.get_tag() == CONSTANT_InterfaceMethodref, "4.4.8");
370  break;
371  }
372  }
373 
374  return ref_entry;
375  }
376 
377 private:
380 };
381 
383 {
384  try
385  {
386  rClassFile();
387  }
388 
389  catch(const char *message)
390  {
391  log.error() << message << messaget::eom;
392  return true;
393  }
394 
395  catch(const std::string &message)
396  {
397  log.error() << message << messaget::eom;
398  return true;
399  }
400 
401  catch(...)
402  {
403  log.error() << "parsing error" << messaget::eom;
404  return true;
405  }
406 
407  return false;
408 }
409 
410 #define ACC_PUBLIC 0x0001u
411 #define ACC_PRIVATE 0x0002u
412 #define ACC_PROTECTED 0x0004u
413 #define ACC_STATIC 0x0008u
414 #define ACC_FINAL 0x0010u
415 #define ACC_SYNCHRONIZED 0x0020u
416 #define ACC_BRIDGE 0x0040u
417 #define ACC_NATIVE 0x0100u
418 #define ACC_INTERFACE 0x0200u
419 #define ACC_ABSTRACT 0x0400u
420 #define ACC_STRICT 0x0800u
421 #define ACC_SYNTHETIC 0x1000u
422 #define ACC_ANNOTATION 0x2000u
423 #define ACC_ENUM 0x4000u
424 
425 #define UNUSED_u2(x) \
426  { \
427  const u2 x = read<u2>(); \
428  (void)x; \
429  } \
430  (void)0
431 
433 {
435 
436  const u4 magic = read<u4>();
437  UNUSED_u2(minor_version);
438  const u2 major_version = read<u2>();
439 
440  if(magic!=0xCAFEBABE)
441  {
442  log.error() << "wrong magic" << messaget::eom;
443  throw 0;
444  }
445 
446  if(major_version<44)
447  {
448  log.error() << "unexpected major version" << messaget::eom;
449  throw 0;
450  }
451 
452  rconstant_pool();
453 
454  classt &parsed_class=parse_tree.parsed_class;
455 
456  const u2 access_flags = read<u2>();
457  const u2 this_class = read<u2>();
458  const u2 super_class = read<u2>();
459 
460  parsed_class.is_abstract=(access_flags&ACC_ABSTRACT)!=0;
461  parsed_class.is_enum=(access_flags&ACC_ENUM)!=0;
462  parsed_class.is_public=(access_flags&ACC_PUBLIC)!=0;
463  parsed_class.is_protected=(access_flags&ACC_PROTECTED)!=0;
464  parsed_class.is_private=(access_flags&ACC_PRIVATE)!=0;
465  parsed_class.is_final = (access_flags & ACC_FINAL) != 0;
466  parsed_class.is_interface = (access_flags & ACC_INTERFACE) != 0;
467  parsed_class.is_synthetic = (access_flags & ACC_SYNTHETIC) != 0;
468  parsed_class.is_annotation = (access_flags & ACC_ANNOTATION) != 0;
469  parsed_class.name=
470  constant(this_class).type().get(ID_C_base_name);
471 
472  if(super_class!=0)
473  parsed_class.super_class = constant(super_class).type().get(ID_C_base_name);
474 
475  rinterfaces();
476  rfields();
477  rmethods();
478 
479  // count elements of enum
480  if(parsed_class.is_enum)
481  for(fieldt &field : parse_tree.parsed_class.fields)
482  if(field.is_enum)
484 
485  const u2 attributes_count = read<u2>();
486 
487  for(std::size_t j=0; j<attributes_count; j++)
489 
490  get_class_refs();
491 
493 }
494 
497 {
498  for(const auto &c : constant_pool)
499  {
500  switch(c.tag)
501  {
502  case CONSTANT_Class:
503  get_class_refs_rec(c.expr.type());
504  break;
505 
509  break;
510 
511  default: {}
512  }
513  }
514 
516 
517  for(const auto &field : parse_tree.parsed_class.fields)
518  {
519  get_annotation_class_refs(field.annotations);
520 
521  if(field.signature.has_value())
522  {
524  field.descriptor,
525  field.signature,
526  "java::" + id2string(parse_tree.parsed_class.name));
527 
528  // add generic type args to class refs as dependencies, same below for
529  // method types and entries from the local variable type table
531  field_type, parse_tree.class_refs);
532  get_class_refs_rec(field_type);
533  }
534  else
535  {
536  get_class_refs_rec(*java_type_from_string(field.descriptor));
537  }
538  }
539 
540  for(const auto &method : parse_tree.parsed_class.methods)
541  {
542  get_annotation_class_refs(method.annotations);
543  for(const auto &parameter_annotations : method.parameter_annotations)
544  get_annotation_class_refs(parameter_annotations);
545 
546  if(method.signature.has_value())
547  {
549  method.descriptor,
550  method.signature,
551  "java::" + id2string(parse_tree.parsed_class.name));
553  method_type, parse_tree.class_refs);
554  get_class_refs_rec(method_type);
555  }
556  else
557  {
558  get_class_refs_rec(*java_type_from_string(method.descriptor));
559  }
560 
561  for(const auto &var : method.local_variable_table)
562  {
563  if(var.signature.has_value())
564  {
566  var.descriptor,
567  var.signature,
568  "java::" + id2string(parse_tree.parsed_class.name));
570  var_type, parse_tree.class_refs);
571  get_class_refs_rec(var_type);
572  }
573  else
574  {
575  get_class_refs_rec(*java_type_from_string(var.descriptor));
576  }
577  }
578  }
579 }
580 
582 {
583  if(src.id()==ID_code)
584  {
585  const java_method_typet &ct = to_java_method_type(src);
586  const typet &rt=ct.return_type();
587  get_class_refs_rec(rt);
588  for(const auto &p : ct.parameters())
589  get_class_refs_rec(p.type());
590  }
591  else if(src.id() == ID_struct_tag)
592  {
593  const struct_tag_typet &struct_tag_type = to_struct_tag_type(src);
594  if(is_java_array_tag(struct_tag_type.get_identifier()))
596  else
597  parse_tree.class_refs.insert(src.get(ID_C_base_name));
598  }
599  else if(src.id()==ID_struct)
600  {
601  const struct_typet &struct_type=to_struct_type(src);
602  for(const auto &c : struct_type.components())
603  get_class_refs_rec(c.type());
604  }
605  else if(src.id()==ID_pointer)
606  get_class_refs_rec(to_pointer_type(src).base_type());
607 }
608 
612  const std::vector<annotationt> &annotations)
613 {
614  for(const auto &annotation : annotations)
615  {
616  get_class_refs_rec(annotation.type);
617  for(const auto &element_value_pair : annotation.element_value_pairs)
618  get_annotation_value_class_refs(element_value_pair.value);
619  }
620 }
621 
626 {
627  if(const auto &symbol_expr = expr_try_dynamic_cast<symbol_exprt>(value))
628  {
629  const irep_idt &value_id = symbol_expr->get_identifier();
631  }
632  else if(const auto &array_expr = expr_try_dynamic_cast<array_exprt>(value))
633  {
634  for(const exprt &operand : array_expr->operands())
636  }
637  // TODO: enum and nested annotation cases (once these are correctly parsed by
638  // get_relement_value).
639  // Note that in the cases where expr is a string or primitive type, no
640  // additional class references are needed.
641 }
642 
644 {
645  const u2 constant_pool_count = read<u2>();
646  if(constant_pool_count==0)
647  {
648  log.error() << "invalid constant_pool_count" << messaget::eom;
649  throw 0;
650  }
651 
652  constant_pool.resize(constant_pool_count);
653 
654  for(auto it = std::next(constant_pool.begin()); it != constant_pool.end();
655  it++)
656  {
657  it->tag = read<u1>();
658 
659  switch(it->tag)
660  {
661  case CONSTANT_Class:
662  it->ref1 = read<u2>();
663  break;
664 
665  case CONSTANT_Fieldref:
666  case CONSTANT_Methodref:
670  it->ref1 = read<u2>();
671  it->ref2 = read<u2>();
672  break;
673 
674  case CONSTANT_String:
675  case CONSTANT_MethodType:
676  it->ref1 = read<u2>();
677  break;
678 
679  case CONSTANT_Integer:
680  case CONSTANT_Float:
681  it->number = read<u4>();
682  break;
683 
684  case CONSTANT_Long:
685  case CONSTANT_Double:
686  it->number = read<u8>();
687  // Eight-byte constants take up two entries in the constant_pool table.
688  if(it==constant_pool.end())
689  {
690  log.error() << "invalid double entry" << messaget::eom;
691  throw 0;
692  }
693  it++;
694  it->tag=0;
695  break;
696 
697  case CONSTANT_Utf8:
698  {
699  const u2 bytes = read<u2>();
700  std::string s;
701  s.resize(bytes);
702  for(auto &ch : s)
703  ch = read<u1>();
704  it->s = s; // Add to string table
705  }
706  break;
707 
709  it->ref1 = read<u1>();
710  it->ref2 = read<u2>();
711  break;
712 
713  default:
714  log.error() << "unknown constant pool entry (" << it->tag << ")"
715  << messaget::eom;
716  throw 0;
717  }
718  }
719 
720  // we do a bit of post-processing after we have them all
721  std::for_each(
722  std::next(constant_pool.begin()),
723  constant_pool.end(),
724  [&](constant_poolt::value_type &entry) {
725  switch(entry.tag)
726  {
727  case CONSTANT_Class:
728  {
729  const std::string &s = id2string(pool_entry(entry.ref1).s);
730  entry.expr = type_exprt(java_classname(s));
731  }
732  break;
733 
734  case CONSTANT_Fieldref:
735  {
736  const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
737  const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
738  const pool_entryt &class_entry = pool_entry(entry.ref1);
739  const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
740  typet type=type_entry(nameandtype_entry.ref2);
741 
742  auto class_tag = java_classname(id2string(class_name_entry.s));
743 
744  fieldref_exprt fieldref(type, name_entry.s, class_tag.get_identifier());
745 
746  entry.expr = fieldref;
747  }
748  break;
749 
750  case CONSTANT_Methodref:
751  case CONSTANT_InterfaceMethodref:
752  {
753  const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
754  const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
755  const pool_entryt &class_entry = pool_entry(entry.ref1);
756  const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
757  typet type=type_entry(nameandtype_entry.ref2);
758 
759  auto class_tag = java_classname(id2string(class_name_entry.s));
760 
761  irep_idt mangled_method_name =
762  id2string(name_entry.s) + ":" +
763  id2string(pool_entry(nameandtype_entry.ref2).s);
764 
765  irep_idt class_id = class_tag.get_identifier();
766 
767  entry.expr = class_method_descriptor_exprt{
768  type, mangled_method_name, class_id, name_entry.s};
769  }
770  break;
771 
772  case CONSTANT_String:
773  {
774  // ldc turns these into references to java.lang.String
775  entry.expr = java_string_literal_exprt{pool_entry(entry.ref1).s};
776  }
777  break;
778 
779  case CONSTANT_Integer:
780  entry.expr = from_integer(entry.number, java_int_type());
781  break;
782 
783  case CONSTANT_Float:
784  {
785  ieee_floatt value(ieee_float_spect::single_precision());
786  value.unpack(entry.number);
787  entry.expr = value.to_expr();
788  }
789  break;
790 
791  case CONSTANT_Long:
792  entry.expr = from_integer(entry.number, java_long_type());
793  break;
794 
795  case CONSTANT_Double:
796  {
797  ieee_floatt value(ieee_float_spect::double_precision());
798  value.unpack(entry.number);
799  entry.expr = value.to_expr();
800  }
801  break;
802 
803  case CONSTANT_NameAndType:
804  {
805  entry.expr.id("nameandtype");
806  }
807  break;
808 
809  case CONSTANT_MethodHandle:
810  {
811  entry.expr.id("methodhandle");
812  }
813  break;
814 
815  case CONSTANT_MethodType:
816  {
817  entry.expr.id("methodtype");
818  }
819  break;
820 
821  case CONSTANT_InvokeDynamic:
822  {
823  entry.expr.id("invokedynamic");
824  const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
825  typet type=type_entry(nameandtype_entry.ref2);
826  type.set(ID_java_lambda_method_handle_index, entry.ref1);
827  entry.expr.type() = type;
828  }
829  break;
830  }
831  });
832 }
833 
835 {
836  const u2 interfaces_count = read<u2>();
837 
838  for(std::size_t i=0; i<interfaces_count; i++)
840  constant(read<u2>()).type().get(ID_C_base_name));
841 }
842 
844 {
845  const u2 fields_count = read<u2>();
846 
847  for(std::size_t i=0; i<fields_count; i++)
848  {
850 
851  const u2 access_flags = read<u2>();
852  const u2 name_index = read<u2>();
853  const u2 descriptor_index = read<u2>();
854  const u2 attributes_count = read<u2>();
855 
856  field.name=pool_entry(name_index).s;
857  field.is_static=(access_flags&ACC_STATIC)!=0;
858  field.is_final=(access_flags&ACC_FINAL)!=0;
859  field.is_enum=(access_flags&ACC_ENUM)!=0;
860 
861  field.descriptor=id2string(pool_entry(descriptor_index).s);
862  field.is_public=(access_flags&ACC_PUBLIC)!=0;
863  field.is_protected=(access_flags&ACC_PROTECTED)!=0;
864  field.is_private=(access_flags&ACC_PRIVATE)!=0;
865  const auto flags = (field.is_public ? 1 : 0) +
866  (field.is_protected?1:0)+
867  (field.is_private?1:0);
868  DATA_INVARIANT(flags<=1, "at most one of public, protected, private");
869 
870  for(std::size_t j=0; j<attributes_count; j++)
871  rfield_attribute(field);
872  }
873 }
874 
875 #define T_BOOLEAN 4
876 #define T_CHAR 5
877 #define T_FLOAT 6
878 #define T_DOUBLE 7
879 #define T_BYTE 8
880 #define T_SHORT 9
881 #define T_INT 10
882 #define T_LONG 11
883 
884 void java_bytecode_parsert::rbytecode(std::vector<instructiont> &instructions)
885 {
886  const u4 code_length = read<u4>();
887 
888  u4 address;
889  size_t bytecode_index=0; // index of bytecode instruction
890 
891  for(address=0; address<code_length; address++)
892  {
893  bool wide_instruction=false;
894  u4 start_of_instruction=address;
895 
896  u1 bytecode = read<u1>();
897 
898  if(bytecode == BC_wide)
899  {
900  wide_instruction=true;
901  address++;
902  bytecode = read<u1>();
903  // The only valid instructions following a wide byte are
904  // [ifald]load, [ifald]store, ret and iinc
905  // All of these have either format of v, or V
906  INVARIANT(
907  bytecode_info[bytecode].format == 'v' ||
908  bytecode_info[bytecode].format == 'V',
909  std::string("Unexpected wide instruction: ") +
910  bytecode_info[bytecode].mnemonic);
911  }
912 
913  instructions.emplace_back();
914  instructiont &instruction=instructions.back();
915  instruction.bytecode = bytecode;
916  instruction.address=start_of_instruction;
917  instruction.source_location
918  .set_java_bytecode_index(std::to_string(bytecode_index));
919 
920  switch(bytecode_info[bytecode].format)
921  {
922  case ' ': // no further bytes
923  break;
924 
925  case 'c': // a constant_pool index (one byte)
926  if(wide_instruction)
927  {
928  instruction.args.push_back(constant(read<u2>()));
929  address+=2;
930  }
931  else
932  {
933  instruction.args.push_back(constant(read<u1>()));
934  address+=1;
935  }
936  break;
937 
938  case 'C': // a constant_pool index (two bytes)
939  instruction.args.push_back(constant(read<u2>()));
940  address+=2;
941  break;
942 
943  case 'b': // a signed byte
944  {
945  const s1 c = read<u1>();
946  instruction.args.push_back(from_integer(c, signedbv_typet(8)));
947  }
948  address+=1;
949  break;
950 
951  case 'o': // two byte branch offset, signed
952  {
953  const s2 offset = read<u2>();
954  // By converting the signed offset into an absolute address (by adding
955  // the current address) the number represented becomes unsigned.
956  instruction.args.push_back(
957  from_integer(address+offset, unsignedbv_typet(16)));
958  }
959  address+=2;
960  break;
961 
962  case 'O': // four byte branch offset, signed
963  {
964  const s4 offset = read<u4>();
965  // By converting the signed offset into an absolute address (by adding
966  // the current address) the number represented becomes unsigned.
967  instruction.args.push_back(
968  from_integer(address+offset, unsignedbv_typet(32)));
969  }
970  address+=4;
971  break;
972 
973  case 'v': // local variable index (one byte)
974  {
975  if(wide_instruction)
976  {
977  const u2 v = read<u2>();
978  instruction.args.push_back(from_integer(v, unsignedbv_typet(16)));
979  address += 2;
980  }
981  else
982  {
983  const u1 v = read<u1>();
984  instruction.args.push_back(from_integer(v, unsignedbv_typet(8)));
985  address += 1;
986  }
987  }
988 
989  break;
990 
991  case 'V':
992  // local variable index (two bytes) plus two signed bytes
993  if(wide_instruction)
994  {
995  const u2 v = read<u2>();
996  instruction.args.push_back(from_integer(v, unsignedbv_typet(16)));
997  const s2 c = read<u2>();
998  instruction.args.push_back(from_integer(c, signedbv_typet(16)));
999  address+=4;
1000  }
1001  else // local variable index (one byte) plus one signed byte
1002  {
1003  const u1 v = read<u1>();
1004  instruction.args.push_back(from_integer(v, unsignedbv_typet(8)));
1005  const s1 c = read<u1>();
1006  instruction.args.push_back(from_integer(c, signedbv_typet(8)));
1007  address+=2;
1008  }
1009  break;
1010 
1011  case 'I': // two byte constant_pool index plus two bytes
1012  {
1013  const u2 c = read<u2>();
1014  instruction.args.push_back(constant(c));
1015  const u1 b1 = read<u1>();
1016  instruction.args.push_back(from_integer(b1, unsignedbv_typet(8)));
1017  const u1 b2 = read<u1>();
1018  instruction.args.push_back(from_integer(b2, unsignedbv_typet(8)));
1019  }
1020  address+=4;
1021  break;
1022 
1023  case 'L': // lookupswitch
1024  {
1025  u4 base_offset=address;
1026 
1027  // first a pad to 32-bit align
1028  while(((address + 1u) & 3u) != 0)
1029  {
1030  read<u1>();
1031  address++;
1032  }
1033 
1034  // now default value
1035  const s4 default_value = read<u4>();
1036  // By converting the signed offset into an absolute address (by adding
1037  // the current address) the number represented becomes unsigned.
1038  instruction.args.push_back(
1039  from_integer(base_offset+default_value, unsignedbv_typet(32)));
1040  address+=4;
1041 
1042  // number of pairs
1043  const u4 npairs = read<u4>();
1044  address+=4;
1045 
1046  for(std::size_t i=0; i<npairs; i++)
1047  {
1048  const s4 match = read<u4>();
1049  const s4 offset = read<u4>();
1050  instruction.args.push_back(
1051  from_integer(match, signedbv_typet(32)));
1052  // By converting the signed offset into an absolute address (by adding
1053  // the current address) the number represented becomes unsigned.
1054  instruction.args.push_back(
1055  from_integer(base_offset+offset, unsignedbv_typet(32)));
1056  address+=8;
1057  }
1058  }
1059  break;
1060 
1061  case 'T': // tableswitch
1062  {
1063  size_t base_offset=address;
1064 
1065  // first a pad to 32-bit align
1066  while(((address + 1u) & 3u) != 0)
1067  {
1068  read<u1>();
1069  address++;
1070  }
1071 
1072  // now default value
1073  const s4 default_value = read<u4>();
1074  instruction.args.push_back(
1075  from_integer(base_offset+default_value, signedbv_typet(32)));
1076  address+=4;
1077 
1078  // now low value
1079  const s4 low_value = read<u4>();
1080  address+=4;
1081 
1082  // now high value
1083  const s4 high_value = read<u4>();
1084  address+=4;
1085 
1086  // there are high-low+1 offsets, and they are signed
1087  for(s4 i=low_value; i<=high_value; i++)
1088  {
1089  s4 offset = read<u4>();
1090  instruction.args.push_back(from_integer(i, signedbv_typet(32)));
1091  // By converting the signed offset into an absolute address (by adding
1092  // the current address) the number represented becomes unsigned.
1093  instruction.args.push_back(
1094  from_integer(base_offset+offset, unsignedbv_typet(32)));
1095  address+=4;
1096  }
1097  }
1098  break;
1099 
1100  case 'm': // multianewarray: constant-pool index plus one unsigned byte
1101  {
1102  const u2 c = read<u2>(); // constant-pool index
1103  instruction.args.push_back(constant(c));
1104  const u1 dimensions = read<u1>(); // number of dimensions
1105  instruction.args.push_back(
1106  from_integer(dimensions, unsignedbv_typet(8)));
1107  address+=3;
1108  }
1109  break;
1110 
1111  case 't': // array subtype, one byte
1112  {
1113  typet t;
1114  switch(read<u1>())
1115  {
1116  case T_BOOLEAN: t.id(ID_bool); break;
1117  case T_CHAR: t.id(ID_char); break;
1118  case T_FLOAT: t.id(ID_float); break;
1119  case T_DOUBLE: t.id(ID_double); break;
1120  case T_BYTE: t.id(ID_byte); break;
1121  case T_SHORT: t.id(ID_short); break;
1122  case T_INT: t.id(ID_int); break;
1123  case T_LONG: t.id(ID_long); break;
1124  default:{};
1125  }
1126  instruction.args.push_back(type_exprt(t));
1127  }
1128  address+=1;
1129  break;
1130 
1131  case 's': // a signed short
1132  {
1133  const s2 s = read<u2>();
1134  instruction.args.push_back(from_integer(s, signedbv_typet(16)));
1135  }
1136  address+=2;
1137  break;
1138 
1139  default:
1140  throw "unknown JVM bytecode instruction";
1141  }
1142  bytecode_index++;
1143  }
1144 
1145  if(address!=code_length)
1146  {
1147  log.error() << "bytecode length mismatch" << messaget::eom;
1148  throw 0;
1149  }
1150 }
1151 
1153 {
1154  const u2 attribute_name_index = read<u2>();
1155  const u4 attribute_length = read<u4>();
1156 
1157  irep_idt attribute_name=pool_entry(attribute_name_index).s;
1158 
1159  if(attribute_name == "Code")
1160  {
1161  UNUSED_u2(max_stack);
1162  UNUSED_u2(max_locals);
1163 
1164  if(skip_instructions)
1165  skip_bytes(read<u4>());
1166  else
1167  rbytecode(method.instructions);
1168 
1169  const u2 exception_table_length = read<u2>();
1170  if(skip_instructions)
1171  skip_bytes(exception_table_length * 8u);
1172  else
1173  {
1174  method.exception_table.resize(exception_table_length);
1175 
1176  for(std::size_t e = 0; e < exception_table_length; e++)
1177  {
1178  const u2 start_pc = read<u2>();
1179  const u2 end_pc = read<u2>();
1180 
1181  // From the class file format spec ("4.7.3. The Code Attribute" for
1182  // Java8)
1183  INVARIANT(
1184  start_pc < end_pc,
1185  "The start_pc must be less than the end_pc as this is the range the "
1186  "exception is active");
1187 
1188  const u2 handler_pc = read<u2>();
1189  const u2 catch_type = read<u2>();
1190  method.exception_table[e].start_pc = start_pc;
1191  method.exception_table[e].end_pc = end_pc;
1192  method.exception_table[e].handler_pc = handler_pc;
1193  if(catch_type != 0)
1194  method.exception_table[e].catch_type =
1195  to_struct_tag_type(pool_entry(catch_type).expr.type());
1196  }
1197  }
1198 
1199  u2 attributes_count = read<u2>();
1200 
1201  for(std::size_t j=0; j<attributes_count; j++)
1202  rcode_attribute(method);
1203 
1204  // method name
1206  "java::" + id2string(parse_tree.parsed_class.name) + "." +
1207  id2string(method.name) + ":" + method.descriptor);
1208 
1209  irep_idt line_number;
1210 
1211  // add missing line numbers
1212  for(auto &instruction : method.instructions)
1213  {
1214  if(!instruction.source_location.get_line().empty())
1215  line_number = instruction.source_location.get_line();
1216  else if(!line_number.empty())
1217  instruction.source_location.set_line(line_number);
1218  instruction.source_location.set_function(
1219  method.source_location.get_function());
1220  }
1221 
1222  // line number of method (the first line number available)
1223  const auto it = std::find_if(
1224  method.instructions.begin(),
1225  method.instructions.end(),
1226  [&](const instructiont &instruction) {
1227  return !instruction.source_location.get_line().empty();
1228  });
1229  if(it != method.instructions.end())
1230  method.source_location.set_line(it->source_location.get_line());
1231  }
1232  else if(attribute_name=="Signature")
1233  {
1234  const u2 signature_index = read<u2>();
1235  method.signature=id2string(pool_entry(signature_index).s);
1236  }
1237  else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1238  attribute_name=="RuntimeVisibleAnnotations")
1239  {
1241  }
1242  else if(
1243  attribute_name == "RuntimeInvisibleParameterAnnotations" ||
1244  attribute_name == "RuntimeVisibleParameterAnnotations")
1245  {
1246  const u1 parameter_count = read<u1>();
1247  // There may be attributes for both runtime-visible and runtime-invisible
1248  // annotations, the length of either array may be longer than the other as
1249  // trailing parameters without annotations are omitted.
1250  // Extend our parameter_annotations if this one is longer than the one
1251  // previously recorded (if any).
1252  if(method.parameter_annotations.size() < parameter_count)
1253  method.parameter_annotations.resize(parameter_count);
1254  for(u2 param_no = 0; param_no < parameter_count; ++param_no)
1256  }
1257  else if(attribute_name == "Exceptions")
1258  {
1260  }
1261  else
1262  skip_bytes(attribute_length);
1263 }
1264 
1266 {
1267  const u2 attribute_name_index = read<u2>();
1268  const u4 attribute_length = read<u4>();
1269 
1270  irep_idt attribute_name=pool_entry(attribute_name_index).s;
1271 
1272  if(attribute_name=="Signature")
1273  {
1274  const u2 signature_index = read<u2>();
1275  field.signature=id2string(pool_entry(signature_index).s);
1276  }
1277  else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1278  attribute_name=="RuntimeVisibleAnnotations")
1279  {
1281  }
1282  else
1283  skip_bytes(attribute_length);
1284 }
1285 
1287 {
1288  const u2 attribute_name_index = read<u2>();
1289  const u4 attribute_length = read<u4>();
1290 
1291  irep_idt attribute_name=pool_entry(attribute_name_index).s;
1292 
1293  if(attribute_name=="LineNumberTable")
1294  {
1295  std::map<unsigned, std::reference_wrapper<instructiont>> instruction_map;
1296  for(auto &instruction : method.instructions)
1297  instruction_map.emplace(instruction.address, instruction);
1298 
1299  const u2 line_number_table_length = read<u2>();
1300 
1301  for(std::size_t i=0; i<line_number_table_length; i++)
1302  {
1303  const u2 start_pc = read<u2>();
1304  const u2 line_number = read<u2>();
1305 
1306  // annotate the bytecode program
1307  auto it = instruction_map.find(start_pc);
1308 
1309  if(it!=instruction_map.end())
1310  it->second.get().source_location.set_line(line_number);
1311  }
1312  }
1313  else if(attribute_name=="LocalVariableTable")
1314  {
1315  const u2 local_variable_table_length = read<u2>();
1316 
1317  method.local_variable_table.resize(local_variable_table_length);
1318 
1319  for(std::size_t i=0; i<local_variable_table_length; i++)
1320  {
1321  const u2 start_pc = read<u2>();
1322  const u2 length = read<u2>();
1323  const u2 name_index = read<u2>();
1324  const u2 descriptor_index = read<u2>();
1325  const u2 index = read<u2>();
1326 
1327  method.local_variable_table[i].index=index;
1328  method.local_variable_table[i].name=pool_entry(name_index).s;
1329  method.local_variable_table[i].descriptor=
1330  id2string(pool_entry(descriptor_index).s);
1331  method.local_variable_table[i].start_pc=start_pc;
1332  method.local_variable_table[i].length=length;
1333  }
1334  }
1335  else if(attribute_name=="LocalVariableTypeTable")
1336  {
1338  }
1339  else if(attribute_name=="StackMapTable")
1340  {
1341  const u2 stack_map_entries = read<u2>();
1342 
1343  method.stack_map_table.resize(stack_map_entries);
1344 
1345  for(size_t i=0; i<stack_map_entries; i++)
1346  {
1347  const u1 frame_type = read<u1>();
1348  if(frame_type<=63)
1349  {
1351  method.stack_map_table[i].locals.resize(0);
1352  method.stack_map_table[i].stack.resize(0);
1353  }
1354  else if(64<=frame_type && frame_type<=127)
1355  {
1356  method.stack_map_table[i].type=
1358  method.stack_map_table[i].locals.resize(0);
1359  method.stack_map_table[i].stack.resize(1);
1360  methodt::verification_type_infot verification_type_info;
1361  read_verification_type_info(verification_type_info);
1362  method.stack_map_table[i].stack[0]=verification_type_info;
1363  }
1364  else if(frame_type==247)
1365  {
1366  method.stack_map_table[i].type=
1368  method.stack_map_table[i].locals.resize(0);
1369  method.stack_map_table[i].stack.resize(1);
1370  methodt::verification_type_infot verification_type_info;
1371  const u2 offset_delta = read<u2>();
1372  read_verification_type_info(verification_type_info);
1373  method.stack_map_table[i].stack[0]=verification_type_info;
1374  method.stack_map_table[i].offset_delta=offset_delta;
1375  }
1376  else if(248<=frame_type && frame_type<=250)
1377  {
1379  method.stack_map_table[i].locals.resize(0);
1380  method.stack_map_table[i].stack.resize(0);
1381  const u2 offset_delta = read<u2>();
1382  method.stack_map_table[i].offset_delta=offset_delta;
1383  }
1384  else if(frame_type==251)
1385  {
1386  method.stack_map_table[i].type
1388  method.stack_map_table[i].locals.resize(0);
1389  method.stack_map_table[i].stack.resize(0);
1390  const u2 offset_delta = read<u2>();
1391  method.stack_map_table[i].offset_delta=offset_delta;
1392  }
1393  else if(252<=frame_type && frame_type<=254)
1394  {
1395  size_t new_locals = frame_type - 251;
1397  method.stack_map_table[i].locals.resize(new_locals);
1398  method.stack_map_table[i].stack.resize(0);
1399  const u2 offset_delta = read<u2>();
1400  method.stack_map_table[i].offset_delta=offset_delta;
1401  for(size_t k=0; k<new_locals; k++)
1402  {
1403  method.stack_map_table[i].locals
1404  .push_back(methodt::verification_type_infot());
1406  method.stack_map_table[i].locals.back();
1408  }
1409  }
1410  else if(frame_type==255)
1411  {
1413  const u2 offset_delta = read<u2>();
1414  method.stack_map_table[i].offset_delta=offset_delta;
1415  const u2 number_locals = read<u2>();
1416  method.stack_map_table[i].locals.resize(number_locals);
1417  for(size_t k=0; k<(size_t) number_locals; k++)
1418  {
1419  method.stack_map_table[i].locals
1420  .push_back(methodt::verification_type_infot());
1422  method.stack_map_table[i].locals.back();
1424  }
1425  const u2 number_stack_items = read<u2>();
1426  method.stack_map_table[i].stack.resize(number_stack_items);
1427  for(size_t k=0; k<(size_t) number_stack_items; k++)
1428  {
1429  method.stack_map_table[i].stack
1430  .push_back(methodt::verification_type_infot());
1432  method.stack_map_table[i].stack.back();
1434  }
1435  }
1436  else
1437  throw "error: unknown stack frame type encountered";
1438  }
1439  }
1440  else
1441  skip_bytes(attribute_length);
1442 }
1443 
1446 {
1447  const u1 tag = read<u1>();
1448  switch(tag)
1449  {
1450  case VTYPE_INFO_TOP:
1452  break;
1453  case VTYPE_INFO_INTEGER:
1455  break;
1456  case VTYPE_INFO_FLOAT:
1458  break;
1459  case VTYPE_INFO_LONG:
1461  break;
1462  case VTYPE_INFO_DOUBLE:
1464  break;
1465  case VTYPE_INFO_ITEM_NULL:
1467  break;
1470  break;
1471  case VTYPE_INFO_OBJECT:
1473  v.cpool_index = read<u2>();
1474  break;
1475  case VTYPE_INFO_UNINIT:
1477  v.offset = read<u2>();
1478  break;
1479  default:
1480  throw "error: unknown verification type info encountered";
1481  }
1482 }
1483 
1485  std::vector<annotationt> &annotations)
1486 {
1487  const u2 num_annotations = read<u2>();
1488 
1489  for(u2 number=0; number<num_annotations; number++)
1490  {
1491  annotationt annotation;
1492  rRuntimeAnnotation(annotation);
1493  annotations.push_back(annotation);
1494  }
1495 }
1496 
1498  annotationt &annotation)
1499 {
1500  const u2 type_index = read<u2>();
1501  annotation.type=type_entry(type_index);
1503 }
1504 
1506  annotationt::element_value_pairst &element_value_pairs)
1507 {
1508  const u2 num_element_value_pairs = read<u2>();
1509  element_value_pairs.resize(num_element_value_pairs);
1510 
1511  for(auto &element_value_pair : element_value_pairs)
1512  {
1513  const u2 element_name_index = read<u2>();
1514  element_value_pair.element_name=pool_entry(element_name_index).s;
1515  element_value_pair.value = get_relement_value();
1516  }
1517 }
1518 
1526 {
1527  const u1 tag = read<u1>();
1528 
1529  switch(tag)
1530  {
1531  case 'e':
1532  {
1533  UNUSED_u2(type_name_index);
1534  UNUSED_u2(const_name_index);
1535  // todo: enum
1536  return exprt();
1537  }
1538 
1539  case 'c':
1540  {
1541  const u2 class_info_index = read<u2>();
1542  return symbol_exprt::typeless(pool_entry(class_info_index).s);
1543  }
1544 
1545  case '@':
1546  {
1547  // TODO: return this wrapped in an exprt
1548  // another annotation, recursively
1549  annotationt annotation;
1550  rRuntimeAnnotation(annotation);
1551  return exprt();
1552  }
1553 
1554  case '[':
1555  {
1556  const u2 num_values = read<u2>();
1557  exprt::operandst values;
1558  values.reserve(num_values);
1559  for(std::size_t i=0; i<num_values; i++)
1560  {
1561  values.push_back(get_relement_value());
1562  }
1563  return array_exprt(std::move(values), array_typet(typet(), exprt()));
1564  }
1565 
1566  case 's':
1567  {
1568  const u2 const_value_index = read<u2>();
1569  return string_constantt(pool_entry(const_value_index).s);
1570  }
1571 
1572  default:
1573  {
1574  const u2 const_value_index = read<u2>();
1575  return constant(const_value_index);
1576  }
1577  }
1578 }
1579 
1592  const u4 &attribute_length)
1593 {
1594  classt &parsed_class = parse_tree.parsed_class;
1595  std::string name = parsed_class.name.c_str();
1596  const u2 number_of_classes = read<u2>();
1597  const u4 number_of_bytes_to_be_read = number_of_classes * 8 + 2;
1598  INVARIANT(
1599  number_of_bytes_to_be_read == attribute_length,
1600  "The number of bytes to be read for the InnerClasses attribute does not "
1601  "match the attribute length.");
1602 
1603  const auto pool_entry_lambda = [this](u2 index) -> pool_entryt & {
1604  return pool_entry(index);
1605  };
1606  const auto remove_separator_char = [](std::string str, char ch) {
1607  str.erase(std::remove(str.begin(), str.end(), ch), str.end());
1608  return str;
1609  };
1610 
1611  for(int i = 0; i < number_of_classes; i++)
1612  {
1613  const u2 inner_class_info_index = read<u2>();
1614  const u2 outer_class_info_index = read<u2>();
1615  const u2 inner_name_index = read<u2>();
1616  const u2 inner_class_access_flags = read<u2>();
1617 
1618  std::string inner_class_info_name =
1619  class_infot(pool_entry(inner_class_info_index))
1620  .get_name(pool_entry_lambda);
1621  bool is_private = (inner_class_access_flags & ACC_PRIVATE) != 0;
1622  bool is_public = (inner_class_access_flags & ACC_PUBLIC) != 0;
1623  bool is_protected = (inner_class_access_flags & ACC_PROTECTED) != 0;
1624  bool is_static = (inner_class_access_flags & ACC_STATIC) != 0;
1625 
1626  // If the original parsed class name matches the inner class name,
1627  // the parsed class is an inner class, so overwrite the parsed class'
1628  // access information and mark it as an inner class.
1629  bool is_inner_class = remove_separator_char(id2string(parsed_class.name), '.') ==
1630  remove_separator_char(inner_class_info_name, '/');
1631  if(!is_inner_class)
1632  continue;
1633  parsed_class.is_inner_class = is_inner_class;
1634  parsed_class.is_static_class = is_static;
1635  // This is a marker that a class is anonymous.
1636  if(inner_name_index == 0)
1637  parsed_class.is_anonymous_class = true;
1638  else
1639  parsed_class.inner_name = pool_entry_lambda(inner_name_index).s;
1640  // Note that if outer_class_info_index == 0, the inner class is an anonymous
1641  // or local class, and is treated as private.
1642  if(outer_class_info_index == 0)
1643  {
1644  parsed_class.is_private = true;
1645  parsed_class.is_protected = false;
1646  parsed_class.is_public = false;
1647  }
1648  else
1649  {
1650  std::string outer_class_info_name =
1651  class_infot(pool_entry(outer_class_info_index))
1652  .get_name(pool_entry_lambda);
1653  parsed_class.outer_class =
1654  constant(outer_class_info_index).type().get(ID_C_base_name);
1655  parsed_class.is_private = is_private;
1656  parsed_class.is_protected = is_protected;
1657  parsed_class.is_public = is_public;
1658  }
1659  }
1660 }
1661 
1668 {
1669  const u2 number_of_exceptions = read<u2>();
1670 
1671  std::vector<irep_idt> exceptions;
1672  for(size_t i = 0; i < number_of_exceptions; i++)
1673  {
1674  const u2 exception_index_table = read<u2>();
1675  const irep_idt exception_name =
1676  constant(exception_index_table).type().get(ID_C_base_name);
1677  exceptions.push_back(exception_name);
1678  }
1679  return exceptions;
1680 }
1681 
1683 {
1684  classt &parsed_class = parse_tree.parsed_class;
1685 
1686  const u2 attribute_name_index = read<u2>();
1687  const u4 attribute_length = read<u4>();
1688 
1689  irep_idt attribute_name=pool_entry(attribute_name_index).s;
1690 
1691  if(attribute_name=="SourceFile")
1692  {
1693  const u2 sourcefile_index = read<u2>();
1694  irep_idt sourcefile_name;
1695 
1696  const std::string &fqn(id2string(parsed_class.name));
1697  size_t last_index = fqn.find_last_of('.');
1698  if(last_index==std::string::npos)
1699  sourcefile_name=pool_entry(sourcefile_index).s;
1700  else
1701  {
1702  std::string package_name=fqn.substr(0, last_index+1);
1703  std::replace(package_name.begin(), package_name.end(), '.', '/');
1704  const std::string &full_file_name=
1705  package_name+id2string(pool_entry(sourcefile_index).s);
1706  sourcefile_name=full_file_name;
1707  }
1708 
1709  for(auto &method : parsed_class.methods)
1710  {
1711  method.source_location.set_file(sourcefile_name);
1712  for(auto &instruction : method.instructions)
1713  {
1714  if(!instruction.source_location.get_line().empty())
1715  instruction.source_location.set_file(sourcefile_name);
1716  }
1717  }
1718  }
1719  else if(attribute_name=="Signature")
1720  {
1721  const u2 signature_index = read<u2>();
1722  parsed_class.signature=id2string(pool_entry(signature_index).s);
1724  parsed_class.signature.value(),
1726  }
1727  else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1728  attribute_name=="RuntimeVisibleAnnotations")
1729  {
1731  }
1732  else if(attribute_name == "BootstrapMethods")
1733  {
1734  // for this attribute
1735  // cf. https://docs.oracle.com/javase/specs/jvms/se8/html/jvms-4.html#jvms-4.7.23
1736  INVARIANT(
1737  !parsed_class.attribute_bootstrapmethods_read,
1738  "only one BootstrapMethods argument is allowed in a class file");
1739 
1740  // mark as read in parsed class
1741  parsed_class.attribute_bootstrapmethods_read = true;
1743  }
1744  else if(attribute_name == "InnerClasses")
1745  {
1747  }
1748  else
1749  skip_bytes(attribute_length);
1750 }
1751 
1753 {
1754  const u2 methods_count = read<u2>();
1755 
1756  for(std::size_t j=0; j<methods_count; j++)
1757  rmethod();
1758 }
1759 
1760 #define ACC_PUBLIC 0x0001u
1761 #define ACC_PRIVATE 0x0002u
1762 #define ACC_PROTECTED 0x0004u
1763 #define ACC_STATIC 0x0008u
1764 #define ACC_FINAL 0x0010u
1765 #define ACC_VARARGS 0x0080u
1766 #define ACC_SUPER 0x0020u
1767 #define ACC_VOLATILE 0x0040u
1768 #define ACC_TRANSIENT 0x0080u
1769 #define ACC_INTERFACE 0x0200u
1770 #define ACC_ABSTRACT 0x0400u
1771 #define ACC_SYNTHETIC 0x1000u
1772 #define ACC_ANNOTATION 0x2000u
1773 #define ACC_ENUM 0x4000u
1774 
1776 {
1778 
1779  const u2 access_flags = read<u2>();
1780  const u2 name_index = read<u2>();
1781  const u2 descriptor_index = read<u2>();
1782 
1783  method.is_final=(access_flags&ACC_FINAL)!=0;
1784  method.is_static=(access_flags&ACC_STATIC)!=0;
1785  method.is_abstract=(access_flags&ACC_ABSTRACT)!=0;
1786  method.is_public=(access_flags&ACC_PUBLIC)!=0;
1787  method.is_protected=(access_flags&ACC_PROTECTED)!=0;
1788  method.is_private=(access_flags&ACC_PRIVATE)!=0;
1789  method.is_synchronized=(access_flags&ACC_SYNCHRONIZED)!=0;
1790  method.is_native=(access_flags&ACC_NATIVE)!=0;
1791  method.is_bridge = (access_flags & ACC_BRIDGE) != 0;
1792  method.is_varargs = (access_flags & ACC_VARARGS) != 0;
1793  method.is_synthetic = (access_flags & ACC_SYNTHETIC) != 0;
1794  method.name=pool_entry(name_index).s;
1795  method.base_name=pool_entry(name_index).s;
1796  method.descriptor=id2string(pool_entry(descriptor_index).s);
1797 
1798  const auto flags = (method.is_public ? 1 : 0) +
1799  (method.is_protected?1:0)+
1800  (method.is_private?1:0);
1801  DATA_INVARIANT(flags<=1, "at most one of public, protected, private");
1802  const u2 attributes_count = read<u2>();
1803 
1804  for(std::size_t j=0; j<attributes_count; j++)
1805  rmethod_attribute(method);
1806 }
1807 
1808 std::optional<java_bytecode_parse_treet> java_bytecode_parse(
1809  std::istream &istream,
1810  const irep_idt &class_name,
1811  message_handlert &message_handler,
1812  bool skip_instructions)
1813 {
1814  java_bytecode_parsert java_bytecode_parser(
1815  skip_instructions, message_handler);
1816  java_bytecode_parser.in=&istream;
1817 
1818  bool parser_result=java_bytecode_parser.parse();
1819 
1820  if(
1821  parser_result ||
1822  java_bytecode_parser.parse_tree.parsed_class.name != class_name)
1823  {
1824  return {};
1825  }
1826 
1827  return std::move(java_bytecode_parser.parse_tree);
1828 }
1829 
1830 std::optional<java_bytecode_parse_treet> java_bytecode_parse(
1831  const std::string &file,
1832  const irep_idt &class_name,
1833  message_handlert &message_handler,
1834  bool skip_instructions)
1835 {
1836  std::ifstream in(file, std::ios::binary);
1837 
1838  if(!in)
1839  {
1840  return {};
1841  }
1842 
1843  return java_bytecode_parse(
1844  in, class_name, message_handler, skip_instructions);
1845 }
1846 
1851 {
1852  const u2 local_variable_type_table_length = read<u2>();
1853 
1854  INVARIANT(
1855  local_variable_type_table_length<=method.local_variable_table.size(),
1856  "Local variable type table cannot have more elements "
1857  "than the local variable table.");
1858  for(std::size_t i=0; i<local_variable_type_table_length; i++)
1859  {
1860  const u2 start_pc = read<u2>();
1861  const u2 length = read<u2>();
1862  const u2 name_index = read<u2>();
1863  const u2 signature_index = read<u2>();
1864  const u2 index = read<u2>();
1865 
1866  bool found=false;
1867  for(auto &lvar : method.local_variable_table)
1868  {
1869  // compare to entry in LVT
1870  if(lvar.index==index &&
1871  lvar.name==pool_entry(name_index).s &&
1872  lvar.start_pc==start_pc &&
1873  lvar.length==length)
1874  {
1875  found=true;
1876  lvar.signature=id2string(pool_entry(signature_index).s);
1877  break;
1878  }
1879  }
1880  INVARIANT(
1881  found,
1882  "Entry in LocalVariableTypeTable must be present in LVT");
1883  }
1884 }
1885 
1894 {
1895  switch(java_handle_kind)
1896  {
1909  default:
1911  }
1912 }
1913 
1919 std::optional<java_bytecode_parsert::lambda_method_handlet>
1921 {
1922  const std::function<pool_entryt &(u2)> pool_entry_lambda =
1923  [this](u2 index) -> pool_entryt & { return pool_entry(index); };
1924 
1925  const base_ref_infot &ref_entry = entry.get_reference(pool_entry_lambda);
1926 
1927  const class_infot &class_entry = ref_entry.get_class(pool_entry_lambda);
1928  const name_and_type_infot &name_and_type =
1929  ref_entry.get_name_and_type(pool_entry_lambda);
1930 
1931  // The following lambda kinds specified in the JVMS (see for example
1932  // https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-5.html#jvms-5.4.3.5
1933  // but which aren't actually created by javac. Java has no syntax for a field-
1934  // refernce like this, and even writing a stereotypical lambda like
1935  // Producer<FieldType> = instance -> instance.field does not generate this
1936  // kind of handle, but rather a synthetic method implementing the getfield
1937  // operation.
1938  // We don't implement a handle kind that can't be produced yet, but should
1939  // they ever turn up this is where to fix them.
1940 
1941  if(
1942  entry.get_handle_kind() ==
1944  entry.get_handle_kind() ==
1946  entry.get_handle_kind() ==
1948  entry.get_handle_kind() ==
1950  {
1951  return {};
1952  }
1953 
1954  irep_idt class_name =
1955  java_classname(class_entry.get_name(pool_entry_lambda)).get_identifier();
1956 
1957  irep_idt method_name = name_and_type.get_name(pool_entry_lambda);
1958  std::string descriptor = name_and_type.get_descriptor(pool_entry_lambda);
1959  irep_idt mangled_method_name = id2string(method_name) + ":" + descriptor;
1960  typet method_type = *java_type_from_string(descriptor);
1961 
1962  method_handle_typet handle_type =
1964 
1965  class_method_descriptor_exprt method_descriptor{
1966  method_type, mangled_method_name, class_name, method_name};
1967  lambda_method_handlet lambda_method_handle{method_descriptor, handle_type};
1968 
1969  return lambda_method_handle;
1970 }
1971 
1974 {
1975  const u2 num_bootstrap_methods = read<u2>();
1976  for(size_t bootstrap_method_index = 0;
1977  bootstrap_method_index < num_bootstrap_methods;
1978  ++bootstrap_method_index)
1979  {
1980  const u2 bootstrap_methodhandle_ref = read<u2>();
1981  const pool_entryt &entry = pool_entry(bootstrap_methodhandle_ref);
1982 
1983  method_handle_infot method_handle{entry};
1984 
1985  const u2 num_bootstrap_arguments = read<u2>();
1986  log.debug() << "INFO: parse BootstrapMethod handle "
1987  << num_bootstrap_arguments << " #args" << messaget::eom;
1988 
1989  // read u2 values of entry into vector
1990  std::vector<u2> u2_values(num_bootstrap_arguments);
1991  for(size_t i = 0; i < num_bootstrap_arguments; i++)
1992  u2_values[i] = read<u2>();
1993 
1994  // try parsing bootstrap method handle
1995  // each entry contains a MethodHandle structure
1996  // u2 tag
1997  // u2 reference kind which must be in the range from 1 to 9
1998  // u2 reference index into the constant pool
1999  //
2000  // reference kinds use the following
2001  // 1 to 4 must point to a CONSTANT_Fieldref structure
2002  // 5 or 8 must point to a CONSTANT_Methodref structure
2003  // 6 or 7 must point to a CONSTANT_Methodref or
2004  // CONSTANT_InterfaceMethodref structure, if the class file version
2005  // number is 52.0 or above, to a CONSTANT_Methodref only in the case
2006  // of less than 52.0
2007  // 9 must point to a CONSTANT_InterfaceMethodref structure
2008 
2009  // the index must point to a CONSTANT_String
2010  // CONSTANT_Class
2011  // CONSTANT_Integer
2012  // CONSTANT_Long
2013  // CONSTANT_Float
2014  // CONSTANT_Double
2015  // CONSTANT_MethodHandle
2016  // CONSTANT_MethodType
2017 
2018  // We read the three arguments here to see whether they correspond to
2019  // our hypotheses for this being a lambda function entry.
2020 
2021  // Need at least 3 arguments, the interface type, the method hanlde
2022  // and the method_type, otherwise it doesn't look like a call that we
2023  // understand
2024  if(num_bootstrap_arguments < 3)
2025  {
2026  store_unknown_method_handle(bootstrap_method_index);
2027  log.debug()
2028  << "format of BootstrapMethods entry not recognized: too few arguments"
2029  << messaget::eom;
2030  continue;
2031  }
2032 
2033  u2 interface_type_index = u2_values[0];
2034  u2 method_handle_index = u2_values[1];
2035  u2 method_type_index = u2_values[2];
2036 
2037  // The additional arguments for the altmetafactory call are skipped,
2038  // as they are currently not used. We verify though that they are of
2039  // CONSTANT_Integer type, cases where this does not hold will be
2040  // analyzed further.
2041  bool recognized = true;
2042  for(size_t i = 3; i < num_bootstrap_arguments; i++)
2043  {
2044  u2 skipped_argument = u2_values[i];
2045  recognized &= pool_entry(skipped_argument).tag == CONSTANT_Integer;
2046  }
2047 
2048  if(!recognized)
2049  {
2050  log.debug() << "format of BootstrapMethods entry not recognized: extra "
2051  "arguments of wrong type"
2052  << messaget::eom;
2053  store_unknown_method_handle(bootstrap_method_index);
2054  continue;
2055  }
2056 
2057  const pool_entryt &interface_type_argument =
2058  pool_entry(interface_type_index);
2059  const pool_entryt &method_handle_argument = pool_entry(method_handle_index);
2060  const pool_entryt &method_type_argument = pool_entry(method_type_index);
2061 
2062  if(
2063  interface_type_argument.tag != CONSTANT_MethodType ||
2064  method_handle_argument.tag != CONSTANT_MethodHandle ||
2065  method_type_argument.tag != CONSTANT_MethodType)
2066  {
2067  log.debug()
2068  << "format of BootstrapMethods entry not recognized: arguments "
2069  "wrong type"
2070  << messaget::eom;
2071  store_unknown_method_handle(bootstrap_method_index);
2072  continue;
2073  }
2074 
2075  log.debug() << "INFO: parse lambda handle" << messaget::eom;
2076  std::optional<lambda_method_handlet> lambda_method_handle =
2077  parse_method_handle(method_handle_infot{method_handle_argument});
2078 
2079  if(!lambda_method_handle.has_value())
2080  {
2081  log.debug() << "format of BootstrapMethods entry not recognized: method "
2082  "handle not recognised"
2083  << messaget::eom;
2084  store_unknown_method_handle(bootstrap_method_index);
2085  continue;
2086  }
2087 
2088  // If parse_method_handle can't parse the lambda method, it should return {}
2089  POSTCONDITION(
2091 
2092  log.debug()
2093  << "lambda function reference "
2094  << id2string(
2095  lambda_method_handle->get_method_descriptor().base_method_name())
2096  << " in class \"" << parse_tree.parsed_class.name << "\""
2097  << "\n interface type is "
2098  << id2string(pool_entry(interface_type_argument.ref1).s)
2099  << "\n method type is "
2100  << id2string(pool_entry(method_type_argument.ref1).s) << messaget::eom;
2102  bootstrap_method_index, *lambda_method_handle);
2103  }
2104 }
2105 
2110  size_t bootstrap_method_index)
2111 {
2115  bootstrap_method_index, lambda_method_handle);
2116 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
struct bytecode_infot const bytecode_info[]
#define BC_wide
int16_t s2
Definition: bytecode_info.h:60
int8_t s1
Definition: bytecode_info.h:59
int32_t s4
Definition: bytecode_info.h:61
uint16_t u2
Definition: bytecode_info.h:56
uint8_t u1
Definition: bytecode_info.h:55
uint64_t u8
Definition: bytecode_info.h:58
uint32_t u4
Definition: bytecode_info.h:57
Array constructor from list of elements.
Definition: std_expr.h:1621
Arrays with given size.
Definition: std_types.h:807
class_infot get_class(const pool_entry_lookupt &pool_entry) const
name_and_type_infot get_name_and_type(const pool_entry_lookupt &pool_entry) const
u2 get_name_and_type_index() const
base_ref_infot(const pool_entryt &entry)
Corresponds to the CONSTANT_Class_info Structure Described in Java 8 specification 4....
class_infot(const pool_entryt &entry)
std::string get_name(const pool_entry_lookupt &pool_entry) const
An expression describing a method on a class.
Definition: std_expr.h:3513
const typet & return_type() const
Definition: std_types.h:689
const parameterst & parameters() const
Definition: std_types.h:699
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
bool empty() const
Definition: dstring.h:89
const char * c_str() const
Definition: dstring.h:116
Base class for all expressions.
Definition: expr.h:56
std::vector< exprt > operandst
Definition: expr.h:58
typet & type()
Return the type of the expression.
Definition: expr.h:84
operandst & operands()
Definition: expr.h:94
const irep_idt & get(const irep_idt &name) const
Definition: irep.cpp:44
const irep_idt & id() const
Definition: irep.h:388
void get_annotation_class_refs(const std::vector< annotationt > &annotations)
For each of the given annotations, get a reference to its class and recursively get class references ...
void store_unknown_method_handle(size_t bootstrap_method_index)
Creates an unknown method handle and puts it into the parsed_class.
exprt get_relement_value()
Corresponds to the element_value structure Described in Java 8 specification 4.7.16....
java_bytecode_parse_treet::annotationt annotationt
java_bytecode_parse_treet::methodt methodt
void rRuntimeAnnotation_attribute(std::vector< annotationt > &)
const typet type_entry(u2 index)
void rbytecode(std::vector< instructiont > &)
void rcode_attribute(methodt &method)
void read_bootstrapmethods_entry()
Read all entries of the BootstrapMethods attribute of a class file.
void get_class_refs()
Get the class references for the benefit of a dependency analysis.
void get_annotation_value_class_refs(const exprt &value)
See java_bytecode_parsert::get_annotation_class_refs.
java_bytecode_parse_treet parse_tree
std::vector< pool_entryt > constant_poolt
java_bytecode_parsert(bool skip_instructions, message_handlert &message_handler)
void skip_bytes(std::size_t bytes)
void read_verification_type_info(methodt::verification_type_infot &)
java_bytecode_parse_treet::fieldt fieldt
pool_entryt & pool_entry(u2 index)
void get_class_refs_rec(const typet &)
void parse_local_variable_type_table(methodt &method)
Parses the local variable type table of a method.
void rRuntimeAnnotation(annotationt &)
std::optional< lambda_method_handlet > parse_method_handle(const class method_handle_infot &entry)
Read method handle pointed to from constant pool entry at index, return type of method handle and nam...
void relement_value_pairs(annotationt::element_value_pairst &)
void rinner_classes_attribute(const u4 &attribute_length)
Corresponds to the element_value structure Described in Java 8 specification 4.7.6 https://docs....
std::vector< irep_idt > rexceptions_attribute()
Corresponds to the element_value structure Described in Java 8 specification 4.7.5 https://docs....
void rmethod_attribute(methodt &method)
method_handle_kindt
Indicates what sort of code should be synthesised for a lambda call:
Definition: java_types.h:464
@ LAMBDA_STATIC_METHOD_HANDLE
Direct call to the given method.
@ LAMBDA_VIRTUAL_METHOD_HANDLE
Virtual call to the given interface or method.
@ LAMBDA_CONSTRUCTOR_HANDLE
Instantiate the needed type then call a constructor.
mstreamt & error() const
Definition: message.h:391
mstreamt & debug() const
Definition: message.h:421
static eomt eom
Definition: message.h:289
method_handle_infot(const pool_entryt &entry)
base_ref_infot get_reference(const pool_entry_lookupt &pool_entry) const
method_handle_kindt handle_kind
method_handle_kindt get_handle_kind() const
method_handle_kindt
Correspond to the different valid values for field handle_kind From Java 8 spec 4....
Corresponds to the CONSTANT_NameAndType_info Structure Described in Java 8 specification 4....
name_and_type_infot(const pool_entryt &entry)
std::string get_descriptor(const pool_entry_lookupt &pool_entry) const
std::string get_name(const pool_entry_lookupt &pool_entry) const
Definition: parser.h:24
std::istream * in
Definition: parser.h:26
messaget log
Definition: parser.h:133
Fixed-width bit-vector with two's complement interpretation.
const irep_idt & get_function() const
void set_java_bytecode_index(const irep_idt &index)
void set_line(const irep_idt &line)
void set_function(const irep_idt &function)
A struct tag type, i.e., struct_typet with an identifier.
Definition: std_types.h:493
Structure type, corresponds to C style structs.
Definition: std_types.h:231
const componentst & components() const
Definition: std_types.h:147
std::function< pool_entryt &(u2)> pool_entry_lookupt
static std::string read_utf8_constant(const pool_entryt &entry)
structured_pool_entryt(const pool_entryt &entry)
static symbol_exprt typeless(const irep_idt &id)
Generate a symbol_exprt without a proper type.
Definition: std_expr.h:150
const irep_idt & get_identifier() const
Definition: std_types.h:410
An expression denoting a type.
Definition: std_expr.h:2964
The type of an expression, extends irept.
Definition: type.h:29
Fixed-width bit-vector with unsigned binary interpretation.
static format_containert< T > format(const T &o)
Definition: format.h:37
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
#define CONSTANT_Fieldref
#define CONSTANT_String
#define VTYPE_INFO_DOUBLE
#define UNUSED_u2(x)
#define ACC_PUBLIC
#define CONSTANT_InvokeDynamic
#define VTYPE_INFO_FLOAT
#define ACC_BRIDGE
#define ACC_NATIVE
#define T_SHORT
#define ACC_ENUM
#define CONSTANT_Methodref
#define T_LONG
#define T_FLOAT
#define ACC_VARARGS
#define CONSTANT_Long
#define T_INT
#define VTYPE_INFO_LONG
#define VTYPE_INFO_TOP
#define CONSTANT_NameAndType
#define ACC_ABSTRACT
#define VTYPE_INFO_OBJECT
#define ACC_ANNOTATION
#define T_DOUBLE
#define ACC_SYNCHRONIZED
#define T_BYTE
#define CONSTANT_Class
#define ACC_STATIC
#define CONSTANT_Integer
#define CONSTANT_MethodType
#define VTYPE_INFO_INTEGER
static java_class_typet::method_handle_kindt get_method_handle_type(method_handle_infot::method_handle_kindt java_handle_kind)
Translate the lambda method reference kind in a class file into the kind of handling the method requi...
#define ACC_PROTECTED
#define CONSTANT_Double
#define ACC_PRIVATE
#define CONSTANT_Float
#define VTYPE_INFO_UNINIT
#define CONSTANT_InterfaceMethodref
#define T_BOOLEAN
#define ACC_INTERFACE
#define ACC_SYNTHETIC
#define VTYPE_INFO_UNINIT_THIS
#define ACC_FINAL
std::optional< java_bytecode_parse_treet > java_bytecode_parse(std::istream &istream, const irep_idt &class_name, message_handlert &message_handler, bool skip_instructions)
Attempt to parse a Java class from the given stream.
#define CONSTANT_MethodHandle
#define T_CHAR
#define VTYPE_INFO_ITEM_NULL
#define CONSTANT_Utf8
Representation of a constant Java string.
void get_dependencies_from_generic_parameters(const std::string &signature, std::set< irep_idt > &refs)
Collect information about generic type parameters from a given signature.
Definition: java_types.cpp:996
struct_tag_typet java_classname(const std::string &id)
Definition: java_types.cpp:809
const typet & java_array_element_type(const struct_tag_typet &array_symbol)
Return a const reference to the element type of a given java array type.
Definition: java_types.cpp:144
std::optional< typet > java_type_from_string(const std::string &src, const std::string &class_name_prefix)
Transforms a string representation of a Java type into an internal type representation thereof.
Definition: java_types.cpp:561
bool is_java_array_tag(const irep_idt &tag)
See above.
Definition: java_types.cpp:233
const java_method_typet & to_java_method_type(const typet &type)
Definition: java_types.h:183
std::optional< typet > java_type_from_string_with_exception(const std::string &descriptor, const std::optional< std::string > &signature, const std::string &class_name)
Definition: java_types.h:1140
static std::string binary(const constant_exprt &src)
Definition: json_expr.cpp:187
static std::optional< java_class_typet::java_lambda_method_handlet > lambda_method_handle(const symbol_table_baset &symbol_table, const irep_idt &method_identifier, const java_method_typet &dynamic_method_type)
java_bytecode_parse_treet::classt::lambda_method_handlet lambda_method_handlet
java_bytecode_parse_treet::methodt methodt
Parser utilities.
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: pointer_expr.h:93
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:534
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
#define POSTCONDITION(CONDITION)
Definition: invariant.h:479
API to expression classes.
const struct_typet & to_struct_type(const typet &type)
Cast a typet to a struct_typet.
Definition: std_types.h:308
const struct_tag_typet & to_struct_tag_type(const typet &type)
Cast a typet to a struct_tag_typet.
Definition: std_types.h:518
void replace(const union_find_replacet &replace_map, string_not_contains_constraintt &constraint)
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
std::vector< element_value_pairt > element_value_pairst
std::optional< std::string > signature
void add_method_handle(size_t bootstrap_index, const lambda_method_handlet &handle)
std::optional< std::string > signature
std::vector< annotationst > parameter_annotations
Java annotations that were applied to parameters of this method.