CBMC
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
java_bytecode_parser.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module:
4
5Author: Daniel Kroening, kroening@kroening.com
6
7\*******************************************************************/
8
10
11#include <algorithm>
12#include <fstream>
13#include <map>
14#include <string>
15
16#include <util/arith_tools.h>
17#include <util/ieee_float.h>
18#include <util/parser.h>
19#include <util/std_expr.h>
21
22#include "bytecode_info.h"
25#include "java_types.h"
26
27class java_bytecode_parsert final : public parsert
28{
29public:
32 message_handlert &message_handler)
34 {
35 }
36
37 bool parse() override;
38
40 {
41 u1 tag = 0;
42 u2 ref1 = 0;
43 u2 ref2 = 0;
47 };
48
50
51private:
60
61 using constant_poolt = std::vector<pool_entryt>;
62
64
65 const bool skip_instructions = false;
66
68 {
69 if(index==0 || index>=constant_pool.size())
70 {
71 log.error() << "invalid constant pool index (" << index << ")"
73 log.error() << "constant pool size: " << constant_pool.size()
75 throw 0;
76 }
77
78 return constant_pool[index];
79 }
80
82 {
83 return pool_entry(index).expr;
84 }
85
86 const typet type_entry(u2 index)
87 {
89 }
90
91 void rClassFile();
92 void rconstant_pool();
93 void rinterfaces();
94 void rfields();
95 void rmethods();
96 void rmethod();
98 std::vector<irep_idt> rexceptions_attribute();
99 void rclass_attribute();
100 void rRuntimeAnnotation_attribute(std::vector<annotationt> &);
104 void rmethod_attribute(methodt &method);
105 void rfield_attribute(fieldt &);
106 void rcode_attribute(methodt &method);
107 void read_verification_type_info(methodt::verification_type_infot &);
108 void rbytecode(std::vector<instructiont> &);
109 void get_class_refs();
110 void get_class_refs_rec(const typet &);
111 void get_annotation_class_refs(const std::vector<annotationt> &annotations);
112 void get_annotation_value_class_refs(const exprt &value);
114 std::optional<lambda_method_handlet>
115 parse_method_handle(const class method_handle_infot &entry);
117
118 void skip_bytes(std::size_t bytes)
119 {
120 for(std::size_t i=0; i<bytes; i++)
121 {
122 if(!*in)
123 {
124 log.error() << "unexpected end of bytecode file" << messaget::eom;
125 throw 0;
126 }
127 in->get();
128 }
129 }
130
131 template <typename T>
133 {
134 static_assert(
135 std::is_unsigned<T>::value, "T should be an unsigned integer");
136 const constexpr size_t bytes = sizeof(T);
137 u8 result = 0;
138 for(size_t i = 0; i < bytes; i++)
139 {
140 if(!*in)
141 {
142 log.error() << "unexpected end of bytecode file" << messaget::eom;
143 throw 0;
144 }
145 result <<= 8u;
146 result |= static_cast<u1>(in->get());
147 }
148 return narrow_cast<T>(result);
149 }
150
152};
153
154#define CONSTANT_Class 7
155#define CONSTANT_Fieldref 9
156#define CONSTANT_Methodref 10
157#define CONSTANT_InterfaceMethodref 11
158#define CONSTANT_String 8
159#define CONSTANT_Integer 3
160#define CONSTANT_Float 4
161#define CONSTANT_Long 5
162#define CONSTANT_Double 6
163#define CONSTANT_NameAndType 12
164#define CONSTANT_Utf8 1
165#define CONSTANT_MethodHandle 15
166#define CONSTANT_MethodType 16
167#define CONSTANT_InvokeDynamic 18
168
169#define VTYPE_INFO_TOP 0
170#define VTYPE_INFO_INTEGER 1
171#define VTYPE_INFO_FLOAT 2
172#define VTYPE_INFO_LONG 3
173#define VTYPE_INFO_DOUBLE 4
174#define VTYPE_INFO_ITEM_NULL 5
175#define VTYPE_INFO_UNINIT_THIS 6
176#define VTYPE_INFO_OBJECT 7
177#define VTYPE_INFO_UNINIT 8
178
180{
181public:
183 using pool_entry_lookupt = std::function<pool_entryt &(u2)>;
184
185 explicit structured_pool_entryt(const pool_entryt &entry) : tag(entry.tag)
186 {
187 }
188
189 u1 get_tag() const
190 {
191 return tag;
192 }
193
194protected:
195 static std::string read_utf8_constant(const pool_entryt &entry)
196 {
197 INVARIANT(
198 entry.tag == CONSTANT_Utf8, "Name entry must be a constant UTF-8");
199 return id2string(entry.s);
200 }
201
202private:
204};
205
209{
210public:
211 explicit class_infot(const pool_entryt &entry) : structured_pool_entryt(entry)
212 {
214 name_index = entry.ref1;
215 }
216
217 std::string get_name(const pool_entry_lookupt &pool_entry) const
218 {
219 const pool_entryt &name_entry = pool_entry(name_index);
221 }
222
223private:
225};
226
230{
231public:
232 explicit name_and_type_infot(const pool_entryt &entry)
234 {
236 name_index = entry.ref1;
237 descriptor_index = entry.ref2;
238 }
239
240 std::string get_name(const pool_entry_lookupt &pool_entry) const
241 {
242 const pool_entryt &name_entry = pool_entry(name_index);
244 }
245
246 std::string get_descriptor(const pool_entry_lookupt &pool_entry) const
247 {
248 const pool_entryt &descriptor_entry = pool_entry(descriptor_index);
250 }
251
252private:
255};
256
258{
259public:
260 explicit base_ref_infot(const pool_entryt &entry)
262 {
264 entry.tag == CONSTANT_Fieldref || entry.tag == CONSTANT_Methodref ||
266 class_index = entry.ref1;
268 }
269
271 {
272 return class_index;
273 }
275 {
276 return name_and_type_index;
277 }
278
280 get_name_and_type(const pool_entry_lookupt &pool_entry) const
281 {
283
284 INVARIANT(
286 "name_and_typeindex did not correspond to a name_and_type in the "
287 "constant pool");
288
290 }
291
292 class_infot get_class(const pool_entry_lookupt &pool_entry) const
293 {
294 const pool_entryt &class_entry = pool_entry(class_index);
295
296 return class_infot{class_entry};
297 }
298
299private:
302};
303
305{
306public:
311 {
312 REF_getField = 1,
313 REF_getStatic = 2,
314 REF_putField = 3,
315 REF_putStatic = 4,
321 };
322
323 explicit method_handle_infot(const pool_entryt &entry)
325 {
327 PRECONDITION(entry.ref1 > 0 && entry.ref1 < 10); // Java 8 spec 4.4.8
328 handle_kind = static_cast<method_handle_kindt>(entry.ref1);
329 reference_index = entry.ref2;
330 }
331
333 {
334 return handle_kind;
335 }
336
338 {
339 const base_ref_infot ref_entry{pool_entry(reference_index)};
340
341 // validate the correctness of the constant pool entry
342 switch(handle_kind)
343 {
348 {
349 INVARIANT(ref_entry.get_tag() == CONSTANT_Fieldref, "4.4.2");
350 break;
351 }
354 {
355 INVARIANT(ref_entry.get_tag() == CONSTANT_Methodref, "4.4.2");
356 break;
357 }
360 {
361 INVARIANT(
362 ref_entry.get_tag() == CONSTANT_Methodref ||
364 "4.4.2");
365 break;
366 }
368 {
369 INVARIANT(ref_entry.get_tag() == CONSTANT_InterfaceMethodref, "4.4.8");
370 break;
371 }
372 }
373
374 return ref_entry;
375 }
376
377private:
380};
381
383{
384 try
385 {
386 rClassFile();
387 }
388
389 catch(const char *message)
390 {
391 log.error() << message << messaget::eom;
392 return true;
393 }
394
395 catch(const std::string &message)
396 {
397 log.error() << message << messaget::eom;
398 return true;
399 }
400
401 catch(...)
402 {
403 log.error() << "parsing error" << messaget::eom;
404 return true;
405 }
406
407 return false;
408}
409
410#define ACC_PUBLIC 0x0001u
411#define ACC_PRIVATE 0x0002u
412#define ACC_PROTECTED 0x0004u
413#define ACC_STATIC 0x0008u
414#define ACC_FINAL 0x0010u
415#define ACC_SYNCHRONIZED 0x0020u
416#define ACC_BRIDGE 0x0040u
417#define ACC_NATIVE 0x0100u
418#define ACC_INTERFACE 0x0200u
419#define ACC_ABSTRACT 0x0400u
420#define ACC_STRICT 0x0800u
421#define ACC_SYNTHETIC 0x1000u
422#define ACC_ANNOTATION 0x2000u
423#define ACC_ENUM 0x4000u
424
425#define UNUSED_u2(x) \
426 { \
427 const u2 x = read<u2>(); \
428 (void)x; \
429 } \
430 (void)0
431
433{
435
436 const u4 magic = read<u4>();
438 const u2 major_version = read<u2>();
439
440 if(magic!=0xCAFEBABE)
441 {
442 log.error() << "wrong magic" << messaget::eom;
443 throw 0;
444 }
445
446 if(major_version<44)
447 {
448 log.error() << "unexpected major version" << messaget::eom;
449 throw 0;
450 }
451
453
454 classt &parsed_class=parse_tree.parsed_class;
455
456 const u2 access_flags = read<u2>();
457 const u2 this_class = read<u2>();
458 const u2 super_class = read<u2>();
459
460 parsed_class.is_abstract=(access_flags&ACC_ABSTRACT)!=0;
461 parsed_class.is_enum=(access_flags&ACC_ENUM)!=0;
462 parsed_class.is_public=(access_flags&ACC_PUBLIC)!=0;
463 parsed_class.is_protected=(access_flags&ACC_PROTECTED)!=0;
464 parsed_class.is_private=(access_flags&ACC_PRIVATE)!=0;
465 parsed_class.is_final = (access_flags & ACC_FINAL) != 0;
466 parsed_class.is_interface = (access_flags & ACC_INTERFACE) != 0;
467 parsed_class.is_synthetic = (access_flags & ACC_SYNTHETIC) != 0;
468 parsed_class.is_annotation = (access_flags & ACC_ANNOTATION) != 0;
469 parsed_class.name=
471
472 if(super_class!=0)
473 parsed_class.super_class = constant(super_class).type().get(ID_C_base_name);
474
475 rinterfaces();
476 rfields();
477 rmethods();
478
479 // count elements of enum
480 if(parsed_class.is_enum)
482 if(field.is_enum)
484
485 const u2 attributes_count = read<u2>();
486
487 for(std::size_t j=0; j<attributes_count; j++)
489
491
493}
494
497{
498 for(const auto &c : constant_pool)
499 {
500 switch(c.tag)
501 {
502 case CONSTANT_Class:
503 get_class_refs_rec(c.expr.type());
504 break;
505
509 break;
510
511 default: {}
512 }
513 }
514
516
517 for(const auto &field : parse_tree.parsed_class.fields)
518 {
519 get_annotation_class_refs(field.annotations);
520
521 if(field.signature.has_value())
522 {
524 field.descriptor,
525 field.signature,
527
528 // add generic type args to class refs as dependencies, same below for
529 // method types and entries from the local variable type table
531 field_type, parse_tree.class_refs);
532 get_class_refs_rec(field_type);
533 }
534 else
535 {
537 }
538 }
539
540 for(const auto &method : parse_tree.parsed_class.methods)
541 {
542 get_annotation_class_refs(method.annotations);
543 for(const auto &parameter_annotations : method.parameter_annotations)
544 get_annotation_class_refs(parameter_annotations);
545
546 if(method.signature.has_value())
547 {
549 method.descriptor,
550 method.signature,
555 }
556 else
557 {
558 get_class_refs_rec(*java_type_from_string(method.descriptor));
559 }
560
561 for(const auto &var : method.local_variable_table)
562 {
563 if(var.signature.has_value())
564 {
566 var.descriptor,
567 var.signature,
572 }
573 else
574 {
576 }
577 }
578 }
579}
580
582{
583 if(src.id()==ID_code)
584 {
586 const typet &rt=ct.return_type();
588 for(const auto &p : ct.parameters())
589 get_class_refs_rec(p.type());
590 }
591 else if(src.id() == ID_struct_tag)
592 {
594 if(is_java_array_tag(struct_tag_type.get_identifier()))
596 else
598 }
599 else if(src.id()==ID_struct)
600 {
602 for(const auto &c : struct_type.components())
603 get_class_refs_rec(c.type());
604 }
605 else if(src.id()==ID_pointer)
606 get_class_refs_rec(to_pointer_type(src).base_type());
607}
608
612 const std::vector<annotationt> &annotations)
613{
614 for(const auto &annotation : annotations)
615 {
616 get_class_refs_rec(annotation.type);
617 for(const auto &element_value_pair : annotation.element_value_pairs)
619 }
620}
621
626{
627 if(const auto &symbol_expr = expr_try_dynamic_cast<symbol_exprt>(value))
628 {
629 const irep_idt &value_id = symbol_expr->get_identifier();
631 }
632 else if(const auto &array_expr = expr_try_dynamic_cast<array_exprt>(value))
633 {
634 for(const exprt &operand : array_expr->operands())
636 }
637 // TODO: enum and nested annotation cases (once these are correctly parsed by
638 // get_relement_value).
639 // Note that in the cases where expr is a string or primitive type, no
640 // additional class references are needed.
641}
642
644{
647 {
648 log.error() << "invalid constant_pool_count" << messaget::eom;
649 throw 0;
650 }
651
653
654 for(auto it = std::next(constant_pool.begin()); it != constant_pool.end();
655 it++)
656 {
657 it->tag = read<u1>();
658
659 switch(it->tag)
660 {
661 case CONSTANT_Class:
662 it->ref1 = read<u2>();
663 break;
664
670 it->ref1 = read<u2>();
671 it->ref2 = read<u2>();
672 break;
673
674 case CONSTANT_String:
676 it->ref1 = read<u2>();
677 break;
678
679 case CONSTANT_Integer:
680 case CONSTANT_Float:
681 it->number = read<u4>();
682 break;
683
684 case CONSTANT_Long:
685 case CONSTANT_Double:
686 it->number = read<u8>();
687 // Eight-byte constants take up two entries in the constant_pool table.
688 if(it==constant_pool.end())
689 {
690 log.error() << "invalid double entry" << messaget::eom;
691 throw 0;
692 }
693 it++;
694 it->tag=0;
695 break;
696
697 case CONSTANT_Utf8:
698 {
699 const u2 bytes = read<u2>();
700 std::string s;
701 s.resize(bytes);
702 for(auto &ch : s)
703 ch = read<u1>();
704 it->s = s; // Add to string table
705 }
706 break;
707
709 it->ref1 = read<u1>();
710 it->ref2 = read<u2>();
711 break;
712
713 default:
714 log.error() << "unknown constant pool entry (" << it->tag << ")"
715 << messaget::eom;
716 throw 0;
717 }
718 }
719
720 // we do a bit of post-processing after we have them all
721 std::for_each(
722 std::next(constant_pool.begin()),
723 constant_pool.end(),
724 [&](constant_poolt::value_type &entry) {
725 switch(entry.tag)
726 {
727 case CONSTANT_Class:
728 {
729 const std::string &s = id2string(pool_entry(entry.ref1).s);
730 entry.expr = type_exprt(java_classname(s));
731 }
732 break;
733
734 case CONSTANT_Fieldref:
735 {
736 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
737 const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
738 const pool_entryt &class_entry = pool_entry(entry.ref1);
739 const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
740 typet type=type_entry(nameandtype_entry.ref2);
741
742 auto class_tag = java_classname(id2string(class_name_entry.s));
743
744 fieldref_exprt fieldref(type, name_entry.s, class_tag.get_identifier());
745
746 entry.expr = fieldref;
747 }
748 break;
749
750 case CONSTANT_Methodref:
751 case CONSTANT_InterfaceMethodref:
752 {
753 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
754 const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
755 const pool_entryt &class_entry = pool_entry(entry.ref1);
756 const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
757 typet type=type_entry(nameandtype_entry.ref2);
758
759 auto class_tag = java_classname(id2string(class_name_entry.s));
760
761 irep_idt mangled_method_name =
762 id2string(name_entry.s) + ":" +
763 id2string(pool_entry(nameandtype_entry.ref2).s);
764
765 irep_idt class_id = class_tag.get_identifier();
766
767 entry.expr = class_method_descriptor_exprt{
768 type, mangled_method_name, class_id, name_entry.s};
769 }
770 break;
771
772 case CONSTANT_String:
773 {
774 // ldc turns these into references to java.lang.String
775 entry.expr = java_string_literal_exprt{pool_entry(entry.ref1).s};
776 }
777 break;
778
779 case CONSTANT_Integer:
780 entry.expr = from_integer(entry.number, java_int_type());
781 break;
782
783 case CONSTANT_Float:
784 {
785 ieee_float_valuet value(ieee_float_spect::single_precision());
786 value.unpack(entry.number);
787 entry.expr = value.to_expr();
788 }
789 break;
790
791 case CONSTANT_Long:
792 entry.expr = from_integer(entry.number, java_long_type());
793 break;
794
795 case CONSTANT_Double:
796 {
797 ieee_float_valuet value(ieee_float_spect::double_precision());
798 value.unpack(entry.number);
799 entry.expr = value.to_expr();
800 }
801 break;
802
803 case CONSTANT_NameAndType:
804 {
805 entry.expr.id("nameandtype");
806 }
807 break;
808
809 case CONSTANT_MethodHandle:
810 {
811 entry.expr.id("methodhandle");
812 }
813 break;
814
815 case CONSTANT_MethodType:
816 {
817 entry.expr.id("methodtype");
818 }
819 break;
820
821 case CONSTANT_InvokeDynamic:
822 {
823 entry.expr.id("invokedynamic");
824 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
825 typet type=type_entry(nameandtype_entry.ref2);
826 type.set(ID_java_lambda_method_handle_index, entry.ref1);
827 entry.expr.type() = type;
828 }
829 break;
830 }
831 });
832}
833
835{
836 const u2 interfaces_count = read<u2>();
837
838 for(std::size_t i=0; i<interfaces_count; i++)
840 constant(read<u2>()).type().get(ID_C_base_name));
841}
842
844{
845 const u2 fields_count = read<u2>();
846
847 for(std::size_t i=0; i<fields_count; i++)
848 {
850
851 const u2 access_flags = read<u2>();
852 const u2 name_index = read<u2>();
853 const u2 descriptor_index = read<u2>();
854 const u2 attributes_count = read<u2>();
855
856 field.name=pool_entry(name_index).s;
857 field.is_static=(access_flags&ACC_STATIC)!=0;
858 field.is_final=(access_flags&ACC_FINAL)!=0;
859 field.is_enum=(access_flags&ACC_ENUM)!=0;
860
861 field.descriptor=id2string(pool_entry(descriptor_index).s);
862 field.is_public=(access_flags&ACC_PUBLIC)!=0;
863 field.is_protected=(access_flags&ACC_PROTECTED)!=0;
864 field.is_private=(access_flags&ACC_PRIVATE)!=0;
865 const auto flags = (field.is_public ? 1 : 0) +
866 (field.is_protected?1:0)+
867 (field.is_private?1:0);
868 DATA_INVARIANT(flags<=1, "at most one of public, protected, private");
869
870 for(std::size_t j=0; j<attributes_count; j++)
872 }
873}
874
875#define T_BOOLEAN 4
876#define T_CHAR 5
877#define T_FLOAT 6
878#define T_DOUBLE 7
879#define T_BYTE 8
880#define T_SHORT 9
881#define T_INT 10
882#define T_LONG 11
883
884void java_bytecode_parsert::rbytecode(std::vector<instructiont> &instructions)
885{
886 const u4 code_length = read<u4>();
887
888 u4 address;
889 size_t bytecode_index=0; // index of bytecode instruction
890
891 for(address=0; address<code_length; address++)
892 {
893 bool wide_instruction=false;
894 u4 start_of_instruction=address;
895
896 u1 bytecode = read<u1>();
897
898 if(bytecode == BC_wide)
899 {
900 wide_instruction=true;
901 address++;
902 bytecode = read<u1>();
903 // The only valid instructions following a wide byte are
904 // [ifald]load, [ifald]store, ret and iinc
905 // All of these have either format of v, or V
906 INVARIANT(
907 bytecode_info[bytecode].format == 'v' ||
908 bytecode_info[bytecode].format == 'V',
909 std::string("Unexpected wide instruction: ") +
910 bytecode_info[bytecode].mnemonic);
911 }
912
913 instructions.emplace_back();
914 instructiont &instruction=instructions.back();
915 instruction.bytecode = bytecode;
916 instruction.address=start_of_instruction;
917 instruction.source_location
918 .set_java_bytecode_index(std::to_string(bytecode_index));
919
920 switch(bytecode_info[bytecode].format)
921 {
922 case ' ': // no further bytes
923 break;
924
925 case 'c': // a constant_pool index (one byte)
927 {
928 instruction.args.push_back(constant(read<u2>()));
929 address+=2;
930 }
931 else
932 {
933 instruction.args.push_back(constant(read<u1>()));
934 address+=1;
935 }
936 break;
937
938 case 'C': // a constant_pool index (two bytes)
939 instruction.args.push_back(constant(read<u2>()));
940 address+=2;
941 break;
942
943 case 'b': // a signed byte
944 {
945 const s1 c = read<u1>();
946 instruction.args.push_back(from_integer(c, signedbv_typet(8)));
947 }
948 address+=1;
949 break;
950
951 case 'o': // two byte branch offset, signed
952 {
953 const s2 offset = read<u2>();
954 // By converting the signed offset into an absolute address (by adding
955 // the current address) the number represented becomes unsigned.
956 instruction.args.push_back(
957 from_integer(address+offset, unsignedbv_typet(16)));
958 }
959 address+=2;
960 break;
961
962 case 'O': // four byte branch offset, signed
963 {
964 const s4 offset = read<u4>();
965 // By converting the signed offset into an absolute address (by adding
966 // the current address) the number represented becomes unsigned.
967 instruction.args.push_back(
968 from_integer(address+offset, unsignedbv_typet(32)));
969 }
970 address+=4;
971 break;
972
973 case 'v': // local variable index (one byte)
974 {
976 {
977 const u2 v = read<u2>();
978 instruction.args.push_back(from_integer(v, unsignedbv_typet(16)));
979 address += 2;
980 }
981 else
982 {
983 const u1 v = read<u1>();
984 instruction.args.push_back(from_integer(v, unsignedbv_typet(8)));
985 address += 1;
986 }
987 }
988
989 break;
990
991 case 'V':
992 // local variable index (two bytes) plus two signed bytes
994 {
995 const u2 v = read<u2>();
996 instruction.args.push_back(from_integer(v, unsignedbv_typet(16)));
997 const s2 c = read<u2>();
998 instruction.args.push_back(from_integer(c, signedbv_typet(16)));
999 address+=4;
1000 }
1001 else // local variable index (one byte) plus one signed byte
1002 {
1003 const u1 v = read<u1>();
1004 instruction.args.push_back(from_integer(v, unsignedbv_typet(8)));
1005 const s1 c = read<u1>();
1006 instruction.args.push_back(from_integer(c, signedbv_typet(8)));
1007 address+=2;
1008 }
1009 break;
1010
1011 case 'I': // two byte constant_pool index plus two bytes
1012 {
1013 const u2 c = read<u2>();
1014 instruction.args.push_back(constant(c));
1015 const u1 b1 = read<u1>();
1016 instruction.args.push_back(from_integer(b1, unsignedbv_typet(8)));
1017 const u1 b2 = read<u1>();
1018 instruction.args.push_back(from_integer(b2, unsignedbv_typet(8)));
1019 }
1020 address+=4;
1021 break;
1022
1023 case 'L': // lookupswitch
1024 {
1025 u4 base_offset=address;
1026
1027 // first a pad to 32-bit align
1028 while(((address + 1u) & 3u) != 0)
1029 {
1030 read<u1>();
1031 address++;
1032 }
1033
1034 // now default value
1035 const s4 default_value = read<u4>();
1036 // By converting the signed offset into an absolute address (by adding
1037 // the current address) the number represented becomes unsigned.
1038 instruction.args.push_back(
1039 from_integer(base_offset+default_value, unsignedbv_typet(32)));
1040 address+=4;
1041
1042 // number of pairs
1043 const u4 npairs = read<u4>();
1044 address+=4;
1045
1046 for(std::size_t i=0; i<npairs; i++)
1047 {
1048 const s4 match = read<u4>();
1049 const s4 offset = read<u4>();
1050 instruction.args.push_back(
1051 from_integer(match, signedbv_typet(32)));
1052 // By converting the signed offset into an absolute address (by adding
1053 // the current address) the number represented becomes unsigned.
1054 instruction.args.push_back(
1056 address+=8;
1057 }
1058 }
1059 break;
1060
1061 case 'T': // tableswitch
1062 {
1063 size_t base_offset=address;
1064
1065 // first a pad to 32-bit align
1066 while(((address + 1u) & 3u) != 0)
1067 {
1068 read<u1>();
1069 address++;
1070 }
1071
1072 // now default value
1073 const s4 default_value = read<u4>();
1074 instruction.args.push_back(
1075 from_integer(base_offset+default_value, signedbv_typet(32)));
1076 address+=4;
1077
1078 // now low value
1079 const s4 low_value = read<u4>();
1080 address+=4;
1081
1082 // now high value
1083 const s4 high_value = read<u4>();
1084 address+=4;
1085
1086 // there are high-low+1 offsets, and they are signed
1087 for(s4 i=low_value; i<=high_value; i++)
1088 {
1089 s4 offset = read<u4>();
1090 instruction.args.push_back(from_integer(i, signedbv_typet(32)));
1091 // By converting the signed offset into an absolute address (by adding
1092 // the current address) the number represented becomes unsigned.
1093 instruction.args.push_back(
1095 address+=4;
1096 }
1097 }
1098 break;
1099
1100 case 'm': // multianewarray: constant-pool index plus one unsigned byte
1101 {
1102 const u2 c = read<u2>(); // constant-pool index
1103 instruction.args.push_back(constant(c));
1104 const u1 dimensions = read<u1>(); // number of dimensions
1105 instruction.args.push_back(
1107 address+=3;
1108 }
1109 break;
1110
1111 case 't': // array subtype, one byte
1112 {
1113 typet t;
1114 switch(read<u1>())
1115 {
1116 case T_BOOLEAN: t.id(ID_bool); break;
1117 case T_CHAR: t.id(ID_char); break;
1118 case T_FLOAT: t.id(ID_float); break;
1119 case T_DOUBLE: t.id(ID_double); break;
1120 case T_BYTE: t.id(ID_byte); break;
1121 case T_SHORT: t.id(ID_short); break;
1122 case T_INT: t.id(ID_int); break;
1123 case T_LONG: t.id(ID_long); break;
1124 default:{};
1125 }
1126 instruction.args.push_back(type_exprt(t));
1127 }
1128 address+=1;
1129 break;
1130
1131 case 's': // a signed short
1132 {
1133 const s2 s = read<u2>();
1134 instruction.args.push_back(from_integer(s, signedbv_typet(16)));
1135 }
1136 address+=2;
1137 break;
1138
1139 default:
1140 throw "unknown JVM bytecode instruction";
1141 }
1143 }
1144
1145 if(address!=code_length)
1146 {
1147 log.error() << "bytecode length mismatch" << messaget::eom;
1148 throw 0;
1149 }
1150}
1151
1153{
1155 const u4 attribute_length = read<u4>();
1156
1158
1159 if(attribute_name == "Code")
1160 {
1163
1166 else
1167 rbytecode(method.instructions);
1168
1172 else
1173 {
1175
1176 for(std::size_t e = 0; e < exception_table_length; e++)
1177 {
1178 const u2 start_pc = read<u2>();
1179 const u2 end_pc = read<u2>();
1180
1181 // From the class file format spec ("4.7.3. The Code Attribute" for
1182 // Java8)
1183 INVARIANT(
1184 start_pc < end_pc,
1185 "The start_pc must be less than the end_pc as this is the range the "
1186 "exception is active");
1187
1188 const u2 handler_pc = read<u2>();
1189 const u2 catch_type = read<u2>();
1190 method.exception_table[e].start_pc = start_pc;
1191 method.exception_table[e].end_pc = end_pc;
1192 method.exception_table[e].handler_pc = handler_pc;
1193 if(catch_type != 0)
1194 method.exception_table[e].catch_type =
1195 to_struct_tag_type(pool_entry(catch_type).expr.type());
1196 }
1197 }
1198
1200
1201 for(std::size_t j=0; j<attributes_count; j++)
1202 rcode_attribute(method);
1203
1204 // method name
1206 "java::" + id2string(parse_tree.parsed_class.name) + "." +
1207 id2string(method.name) + ":" + method.descriptor);
1208
1209 irep_idt line_number;
1210
1211 // add missing line numbers
1212 for(auto &instruction : method.instructions)
1213 {
1214 if(!instruction.source_location.get_line().empty())
1215 line_number = instruction.source_location.get_line();
1216 else if(!line_number.empty())
1217 instruction.source_location.set_line(line_number);
1218 instruction.source_location.set_function(
1219 method.source_location.get_function());
1220 }
1221
1222 // line number of method (the first line number available)
1223 const auto it = std::find_if(
1224 method.instructions.begin(),
1225 method.instructions.end(),
1226 [&](const instructiont &instruction) {
1227 return !instruction.source_location.get_line().empty();
1228 });
1229 if(it != method.instructions.end())
1230 method.source_location.set_line(it->source_location.get_line());
1231 }
1232 else if(attribute_name=="Signature")
1233 {
1234 const u2 signature_index = read<u2>();
1236 }
1237 else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1238 attribute_name=="RuntimeVisibleAnnotations")
1239 {
1241 }
1242 else if(
1243 attribute_name == "RuntimeInvisibleParameterAnnotations" ||
1244 attribute_name == "RuntimeVisibleParameterAnnotations")
1245 {
1246 const u1 parameter_count = read<u1>();
1247 // There may be attributes for both runtime-visible and runtime-invisible
1248 // annotations, the length of either array may be longer than the other as
1249 // trailing parameters without annotations are omitted.
1250 // Extend our parameter_annotations if this one is longer than the one
1251 // previously recorded (if any).
1252 if(method.parameter_annotations.size() < parameter_count)
1256 }
1257 else if(attribute_name == "Exceptions")
1258 {
1260 }
1261 else
1263}
1264
1266{
1268 const u4 attribute_length = read<u4>();
1269
1271
1272 if(attribute_name=="Signature")
1273 {
1274 const u2 signature_index = read<u2>();
1276 }
1277 else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1278 attribute_name=="RuntimeVisibleAnnotations")
1279 {
1281 }
1282 else
1284}
1285
1287{
1289 const u4 attribute_length = read<u4>();
1290
1292
1293 if(attribute_name=="LineNumberTable")
1294 {
1295 std::map<unsigned, std::reference_wrapper<instructiont>> instruction_map;
1296 for(auto &instruction : method.instructions)
1297 instruction_map.emplace(instruction.address, instruction);
1298
1300
1301 for(std::size_t i=0; i<line_number_table_length; i++)
1302 {
1303 const u2 start_pc = read<u2>();
1304 const u2 line_number = read<u2>();
1305
1306 // annotate the bytecode program
1307 auto it = instruction_map.find(start_pc);
1308
1309 if(it!=instruction_map.end())
1310 it->second.get().source_location.set_line(line_number);
1311 }
1312 }
1313 else if(attribute_name=="LocalVariableTable")
1314 {
1316
1318
1319 for(std::size_t i=0; i<local_variable_table_length; i++)
1320 {
1321 const u2 start_pc = read<u2>();
1322 const u2 length = read<u2>();
1323 const u2 name_index = read<u2>();
1324 const u2 descriptor_index = read<u2>();
1325 const u2 index = read<u2>();
1326
1327 method.local_variable_table[i].index=index;
1328 method.local_variable_table[i].name=pool_entry(name_index).s;
1329 method.local_variable_table[i].descriptor=
1330 id2string(pool_entry(descriptor_index).s);
1331 method.local_variable_table[i].start_pc=start_pc;
1332 method.local_variable_table[i].length=length;
1333 }
1334 }
1335 else if(attribute_name=="LocalVariableTypeTable")
1336 {
1338 }
1339 else if(attribute_name=="StackMapTable")
1340 {
1341 const u2 stack_map_entries = read<u2>();
1342
1343 method.stack_map_table.resize(stack_map_entries);
1344
1345 for(size_t i=0; i<stack_map_entries; i++)
1346 {
1347 const u1 frame_type = read<u1>();
1348 if(frame_type<=63)
1349 {
1350 method.stack_map_table[i].type=methodt::stack_map_table_entryt::SAME;
1351 method.stack_map_table[i].locals.resize(0);
1352 method.stack_map_table[i].stack.resize(0);
1353 }
1354 else if(64<=frame_type && frame_type<=127)
1355 {
1356 method.stack_map_table[i].type=
1357 methodt::stack_map_table_entryt::SAME_LOCALS_ONE_STACK;
1358 method.stack_map_table[i].locals.resize(0);
1359 method.stack_map_table[i].stack.resize(1);
1362 method.stack_map_table[i].stack[0]=verification_type_info;
1363 }
1364 else if(frame_type==247)
1365 {
1366 method.stack_map_table[i].type=
1367 methodt::stack_map_table_entryt::SAME_LOCALS_ONE_STACK_EXTENDED;
1368 method.stack_map_table[i].locals.resize(0);
1369 method.stack_map_table[i].stack.resize(1);
1371 const u2 offset_delta = read<u2>();
1373 method.stack_map_table[i].stack[0]=verification_type_info;
1374 method.stack_map_table[i].offset_delta=offset_delta;
1375 }
1376 else if(248<=frame_type && frame_type<=250)
1377 {
1378 method.stack_map_table[i].type=methodt::stack_map_table_entryt::CHOP;
1379 method.stack_map_table[i].locals.resize(0);
1380 method.stack_map_table[i].stack.resize(0);
1381 const u2 offset_delta = read<u2>();
1382 method.stack_map_table[i].offset_delta=offset_delta;
1383 }
1384 else if(frame_type==251)
1385 {
1386 method.stack_map_table[i].type
1387 =methodt::stack_map_table_entryt::SAME_EXTENDED;
1388 method.stack_map_table[i].locals.resize(0);
1389 method.stack_map_table[i].stack.resize(0);
1390 const u2 offset_delta = read<u2>();
1391 method.stack_map_table[i].offset_delta=offset_delta;
1392 }
1393 else if(252<=frame_type && frame_type<=254)
1394 {
1395 size_t new_locals = frame_type - 251;
1396 method.stack_map_table[i].type=methodt::stack_map_table_entryt::APPEND;
1397 method.stack_map_table[i].locals.resize(new_locals);
1398 method.stack_map_table[i].stack.resize(0);
1399 const u2 offset_delta = read<u2>();
1400 method.stack_map_table[i].offset_delta=offset_delta;
1401 for(size_t k=0; k<new_locals; k++)
1402 {
1403 method.stack_map_table[i].locals
1406 method.stack_map_table[i].locals.back();
1408 }
1409 }
1410 else if(frame_type==255)
1411 {
1412 method.stack_map_table[i].type=methodt::stack_map_table_entryt::FULL;
1413 const u2 offset_delta = read<u2>();
1414 method.stack_map_table[i].offset_delta=offset_delta;
1415 const u2 number_locals = read<u2>();
1416 method.stack_map_table[i].locals.resize(number_locals);
1417 for(size_t k=0; k<(size_t) number_locals; k++)
1418 {
1419 method.stack_map_table[i].locals
1422 method.stack_map_table[i].locals.back();
1424 }
1425 const u2 number_stack_items = read<u2>();
1426 method.stack_map_table[i].stack.resize(number_stack_items);
1427 for(size_t k=0; k<(size_t) number_stack_items; k++)
1428 {
1429 method.stack_map_table[i].stack
1432 method.stack_map_table[i].stack.back();
1434 }
1435 }
1436 else
1437 throw "error: unknown stack frame type encountered";
1438 }
1439 }
1440 else
1442}
1443
1446{
1447 const u1 tag = read<u1>();
1448 switch(tag)
1449 {
1450 case VTYPE_INFO_TOP:
1451 v.type=methodt::verification_type_infot::TOP;
1452 break;
1453 case VTYPE_INFO_INTEGER:
1454 v.type=methodt::verification_type_infot::INTEGER;
1455 break;
1456 case VTYPE_INFO_FLOAT:
1457 v.type=methodt::verification_type_infot::FLOAT;
1458 break;
1459 case VTYPE_INFO_LONG:
1460 v.type=methodt::verification_type_infot::LONG;
1461 break;
1462 case VTYPE_INFO_DOUBLE:
1463 v.type=methodt::verification_type_infot::DOUBLE;
1464 break;
1466 v.type=methodt::verification_type_infot::ITEM_NULL;
1467 break;
1469 v.type=methodt::verification_type_infot::UNINITIALIZED_THIS;
1470 break;
1471 case VTYPE_INFO_OBJECT:
1472 v.type=methodt::verification_type_infot::OBJECT;
1473 v.cpool_index = read<u2>();
1474 break;
1475 case VTYPE_INFO_UNINIT:
1476 v.type=methodt::verification_type_infot::UNINITIALIZED;
1477 v.offset = read<u2>();
1478 break;
1479 default:
1480 throw "error: unknown verification type info encountered";
1481 }
1482}
1483
1485 std::vector<annotationt> &annotations)
1486{
1487 const u2 num_annotations = read<u2>();
1488
1489 for(u2 number=0; number<num_annotations; number++)
1490 {
1491 annotationt annotation;
1492 rRuntimeAnnotation(annotation);
1493 annotations.push_back(annotation);
1494 }
1495}
1496
1498 annotationt &annotation)
1499{
1500 const u2 type_index = read<u2>();
1501 annotation.type=type_entry(type_index);
1503}
1504
1506 annotationt::element_value_pairst &element_value_pairs)
1507{
1509 element_value_pairs.resize(num_element_value_pairs);
1510
1511 for(auto &element_value_pair : element_value_pairs)
1512 {
1513 const u2 element_name_index = read<u2>();
1516 }
1517}
1518
1526{
1527 const u1 tag = read<u1>();
1528
1529 switch(tag)
1530 {
1531 case 'e':
1532 {
1535 // todo: enum
1536 return exprt();
1537 }
1538
1539 case 'c':
1540 {
1541 const u2 class_info_index = read<u2>();
1543 }
1544
1545 case '@':
1546 {
1547 // TODO: return this wrapped in an exprt
1548 // another annotation, recursively
1549 annotationt annotation;
1550 rRuntimeAnnotation(annotation);
1551 return exprt();
1552 }
1553
1554 case '[':
1555 {
1556 const u2 num_values = read<u2>();
1557 exprt::operandst values;
1558 values.reserve(num_values);
1559 for(std::size_t i=0; i<num_values; i++)
1560 {
1561 values.push_back(get_relement_value());
1562 }
1563 return array_exprt(std::move(values), array_typet(typet(), exprt()));
1564 }
1565
1566 case 's':
1567 {
1568 const u2 const_value_index = read<u2>();
1570 }
1571
1572 default:
1573 {
1574 const u2 const_value_index = read<u2>();
1576 }
1577 }
1578}
1579
1592 const u4 &attribute_length)
1593{
1594 classt &parsed_class = parse_tree.parsed_class;
1595 std::string name = parsed_class.name.c_str();
1596 const u2 number_of_classes = read<u2>();
1598 INVARIANT(
1600 "The number of bytes to be read for the InnerClasses attribute does not "
1601 "match the attribute length.");
1602
1603 const auto pool_entry_lambda = [this](u2 index) -> pool_entryt & {
1604 return pool_entry(index);
1605 };
1606 const auto remove_separator_char = [](std::string str, char ch) {
1607 str.erase(std::remove(str.begin(), str.end(), ch), str.end());
1608 return str;
1609 };
1610
1611 for(int i = 0; i < number_of_classes; i++)
1612 {
1615 const u2 inner_name_index = read<u2>();
1617
1618 std::string inner_class_info_name =
1621 bool is_private = (inner_class_access_flags & ACC_PRIVATE) != 0;
1622 bool is_public = (inner_class_access_flags & ACC_PUBLIC) != 0;
1623 bool is_protected = (inner_class_access_flags & ACC_PROTECTED) != 0;
1624 bool is_static = (inner_class_access_flags & ACC_STATIC) != 0;
1625
1626 // If the original parsed class name matches the inner class name,
1627 // the parsed class is an inner class, so overwrite the parsed class'
1628 // access information and mark it as an inner class.
1629 bool is_inner_class = remove_separator_char(id2string(parsed_class.name), '.') ==
1631 if(!is_inner_class)
1632 continue;
1633 parsed_class.is_inner_class = is_inner_class;
1634 parsed_class.is_static_class = is_static;
1635 // This is a marker that a class is anonymous.
1636 if(inner_name_index == 0)
1637 parsed_class.is_anonymous_class = true;
1638 else
1640 // Note that if outer_class_info_index == 0, the inner class is an anonymous
1641 // or local class, and is treated as private.
1642 if(outer_class_info_index == 0)
1643 {
1644 parsed_class.is_private = true;
1645 parsed_class.is_protected = false;
1646 parsed_class.is_public = false;
1647 }
1648 else
1649 {
1650 std::string outer_class_info_name =
1653 parsed_class.outer_class =
1655 parsed_class.is_private = is_private;
1656 parsed_class.is_protected = is_protected;
1657 parsed_class.is_public = is_public;
1658 }
1659 }
1660}
1661
1668{
1670
1671 std::vector<irep_idt> exceptions;
1672 for(size_t i = 0; i < number_of_exceptions; i++)
1673 {
1675 const irep_idt exception_name =
1677 exceptions.push_back(exception_name);
1678 }
1679 return exceptions;
1680}
1681
1683{
1684 classt &parsed_class = parse_tree.parsed_class;
1685
1687 const u4 attribute_length = read<u4>();
1688
1690
1691 if(attribute_name=="SourceFile")
1692 {
1693 const u2 sourcefile_index = read<u2>();
1695
1696 const std::string &fqn(id2string(parsed_class.name));
1697 size_t last_index = fqn.find_last_of('.');
1698 if(last_index==std::string::npos)
1700 else
1701 {
1702 std::string package_name=fqn.substr(0, last_index+1);
1703 std::replace(package_name.begin(), package_name.end(), '.', '/');
1704 const std::string &full_file_name=
1707 }
1708
1709 for(auto &method : parsed_class.methods)
1710 {
1711 method.source_location.set_file(sourcefile_name);
1712 for(auto &instruction : method.instructions)
1713 {
1714 if(!instruction.source_location.get_line().empty())
1715 instruction.source_location.set_file(sourcefile_name);
1716 }
1717 }
1718 }
1719 else if(attribute_name=="Signature")
1720 {
1721 const u2 signature_index = read<u2>();
1724 parsed_class.signature.value(),
1726 }
1727 else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1728 attribute_name=="RuntimeVisibleAnnotations")
1729 {
1731 }
1732 else if(attribute_name == "BootstrapMethods")
1733 {
1734 // for this attribute
1735 // cf. https://docs.oracle.com/javase/specs/jvms/se8/html/jvms-4.html#jvms-4.7.23
1736 INVARIANT(
1737 !parsed_class.attribute_bootstrapmethods_read,
1738 "only one BootstrapMethods argument is allowed in a class file");
1739
1740 // mark as read in parsed class
1741 parsed_class.attribute_bootstrapmethods_read = true;
1743 }
1744 else if(attribute_name == "InnerClasses")
1745 {
1747 }
1748 else
1750}
1751
1753{
1754 const u2 methods_count = read<u2>();
1755
1756 for(std::size_t j=0; j<methods_count; j++)
1757 rmethod();
1758}
1759
1760#define ACC_PUBLIC 0x0001u
1761#define ACC_PRIVATE 0x0002u
1762#define ACC_PROTECTED 0x0004u
1763#define ACC_STATIC 0x0008u
1764#define ACC_FINAL 0x0010u
1765#define ACC_VARARGS 0x0080u
1766#define ACC_SUPER 0x0020u
1767#define ACC_VOLATILE 0x0040u
1768#define ACC_TRANSIENT 0x0080u
1769#define ACC_INTERFACE 0x0200u
1770#define ACC_ABSTRACT 0x0400u
1771#define ACC_SYNTHETIC 0x1000u
1772#define ACC_ANNOTATION 0x2000u
1773#define ACC_ENUM 0x4000u
1774
1776{
1778
1779 const u2 access_flags = read<u2>();
1780 const u2 name_index = read<u2>();
1781 const u2 descriptor_index = read<u2>();
1782
1783 method.is_final=(access_flags&ACC_FINAL)!=0;
1784 method.is_static=(access_flags&ACC_STATIC)!=0;
1786 method.is_public=(access_flags&ACC_PUBLIC)!=0;
1790 method.is_native=(access_flags&ACC_NATIVE)!=0;
1791 method.is_bridge = (access_flags & ACC_BRIDGE) != 0;
1792 method.is_varargs = (access_flags & ACC_VARARGS) != 0;
1793 method.is_synthetic = (access_flags & ACC_SYNTHETIC) != 0;
1794 method.name=pool_entry(name_index).s;
1795 method.base_name=pool_entry(name_index).s;
1796 method.descriptor=id2string(pool_entry(descriptor_index).s);
1797
1798 const auto flags = (method.is_public ? 1 : 0) +
1799 (method.is_protected?1:0)+
1800 (method.is_private?1:0);
1801 DATA_INVARIANT(flags<=1, "at most one of public, protected, private");
1802 const u2 attributes_count = read<u2>();
1803
1804 for(std::size_t j=0; j<attributes_count; j++)
1805 rmethod_attribute(method);
1806}
1807
1808std::optional<java_bytecode_parse_treet> java_bytecode_parse(
1809 std::istream &istream,
1810 const irep_idt &class_name,
1811 message_handlert &message_handler,
1812 bool skip_instructions)
1813{
1815 skip_instructions, message_handler);
1816 java_bytecode_parser.in=&istream;
1817
1819
1820 if(
1821 parser_result ||
1822 java_bytecode_parser.parse_tree.parsed_class.name != class_name)
1823 {
1824 return {};
1825 }
1826
1827 return std::move(java_bytecode_parser.parse_tree);
1828}
1829
1830std::optional<java_bytecode_parse_treet> java_bytecode_parse(
1831 const std::string &file,
1832 const irep_idt &class_name,
1833 message_handlert &message_handler,
1834 bool skip_instructions)
1835{
1836 std::ifstream in(file, std::ios::binary);
1837
1838 if(!in)
1839 {
1840 return {};
1841 }
1842
1843 return java_bytecode_parse(
1844 in, class_name, message_handler, skip_instructions);
1845}
1846
1851{
1853
1854 INVARIANT(
1856 "Local variable type table cannot have more elements "
1857 "than the local variable table.");
1858 for(std::size_t i=0; i<local_variable_type_table_length; i++)
1859 {
1860 const u2 start_pc = read<u2>();
1861 const u2 length = read<u2>();
1862 const u2 name_index = read<u2>();
1863 const u2 signature_index = read<u2>();
1864 const u2 index = read<u2>();
1865
1866 bool found=false;
1867 for(auto &lvar : method.local_variable_table)
1868 {
1869 // compare to entry in LVT
1870 if(lvar.index==index &&
1871 lvar.name==pool_entry(name_index).s &&
1872 lvar.start_pc==start_pc &&
1873 lvar.length==length)
1874 {
1875 found=true;
1877 break;
1878 }
1879 }
1880 INVARIANT(
1881 found,
1882 "Entry in LocalVariableTypeTable must be present in LVT");
1883 }
1884}
1885
1913
1919std::optional<java_bytecode_parsert::lambda_method_handlet>
1921{
1922 const std::function<pool_entryt &(u2)> pool_entry_lambda =
1923 [this](u2 index) -> pool_entryt & { return pool_entry(index); };
1924
1926
1929 ref_entry.get_name_and_type(pool_entry_lambda);
1930
1931 // The following lambda kinds specified in the JVMS (see for example
1932 // https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-5.html#jvms-5.4.3.5
1933 // but which aren't actually created by javac. Java has no syntax for a field-
1934 // refernce like this, and even writing a stereotypical lambda like
1935 // Producer<FieldType> = instance -> instance.field does not generate this
1936 // kind of handle, but rather a synthetic method implementing the getfield
1937 // operation.
1938 // We don't implement a handle kind that can't be produced yet, but should
1939 // they ever turn up this is where to fix them.
1940
1941 if(
1942 entry.get_handle_kind() ==
1944 entry.get_handle_kind() ==
1946 entry.get_handle_kind() ==
1948 entry.get_handle_kind() ==
1950 {
1951 return {};
1952 }
1953
1954 irep_idt class_name =
1956
1958 std::string descriptor = name_and_type.get_descriptor(pool_entry_lambda);
1959 irep_idt mangled_method_name = id2string(method_name) + ":" + descriptor;
1961
1962 method_handle_typet handle_type =
1964
1965 class_method_descriptor_exprt method_descriptor{
1966 method_type, mangled_method_name, class_name, method_name};
1967 lambda_method_handlet lambda_method_handle{method_descriptor, handle_type};
1968
1969 return lambda_method_handle;
1970}
1971
1974{
1976 for(size_t bootstrap_method_index = 0;
1979 {
1982
1984
1986 log.debug() << "INFO: parse BootstrapMethod handle "
1987 << num_bootstrap_arguments << " #args" << messaget::eom;
1988
1989 // read u2 values of entry into vector
1990 std::vector<u2> u2_values(num_bootstrap_arguments);
1991 for(size_t i = 0; i < num_bootstrap_arguments; i++)
1992 u2_values[i] = read<u2>();
1993
1994 // try parsing bootstrap method handle
1995 // each entry contains a MethodHandle structure
1996 // u2 tag
1997 // u2 reference kind which must be in the range from 1 to 9
1998 // u2 reference index into the constant pool
1999 //
2000 // reference kinds use the following
2001 // 1 to 4 must point to a CONSTANT_Fieldref structure
2002 // 5 or 8 must point to a CONSTANT_Methodref structure
2003 // 6 or 7 must point to a CONSTANT_Methodref or
2004 // CONSTANT_InterfaceMethodref structure, if the class file version
2005 // number is 52.0 or above, to a CONSTANT_Methodref only in the case
2006 // of less than 52.0
2007 // 9 must point to a CONSTANT_InterfaceMethodref structure
2008
2009 // the index must point to a CONSTANT_String
2010 // CONSTANT_Class
2011 // CONSTANT_Integer
2012 // CONSTANT_Long
2013 // CONSTANT_Float
2014 // CONSTANT_Double
2015 // CONSTANT_MethodHandle
2016 // CONSTANT_MethodType
2017
2018 // We read the three arguments here to see whether they correspond to
2019 // our hypotheses for this being a lambda function entry.
2020
2021 // Need at least 3 arguments, the interface type, the method hanlde
2022 // and the method_type, otherwise it doesn't look like a call that we
2023 // understand
2025 {
2027 log.debug()
2028 << "format of BootstrapMethods entry not recognized: too few arguments"
2029 << messaget::eom;
2030 continue;
2031 }
2032
2036
2037 // The additional arguments for the altmetafactory call are skipped,
2038 // as they are currently not used. We verify though that they are of
2039 // CONSTANT_Integer type, cases where this does not hold will be
2040 // analyzed further.
2041 bool recognized = true;
2042 for(size_t i = 3; i < num_bootstrap_arguments; i++)
2043 {
2046 }
2047
2048 if(!recognized)
2049 {
2050 log.debug() << "format of BootstrapMethods entry not recognized: extra "
2051 "arguments of wrong type"
2052 << messaget::eom;
2054 continue;
2055 }
2056
2061
2062 if(
2066 {
2067 log.debug()
2068 << "format of BootstrapMethods entry not recognized: arguments "
2069 "wrong type"
2070 << messaget::eom;
2072 continue;
2073 }
2074
2075 log.debug() << "INFO: parse lambda handle" << messaget::eom;
2076 std::optional<lambda_method_handlet> lambda_method_handle =
2078
2079 if(!lambda_method_handle.has_value())
2080 {
2081 log.debug() << "format of BootstrapMethods entry not recognized: method "
2082 "handle not recognised"
2083 << messaget::eom;
2085 continue;
2086 }
2087
2088 // If parse_method_handle can't parse the lambda method, it should return {}
2090 lambda_method_handle->handle_type != method_handle_typet::UNKNOWN_HANDLE);
2091
2092 log.debug()
2093 << "lambda function reference "
2094 << id2string(
2095 lambda_method_handle->get_method_descriptor().base_method_name())
2096 << " in class \"" << parse_tree.parsed_class.name << "\""
2097 << "\n interface type is "
2099 << "\n method type is "
2103 }
2104}
2105
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
struct bytecode_infot const bytecode_info[]
#define BC_wide
uint16_t u2
uint32_t u4
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:562
Array constructor from list of elements.
Definition std_expr.h:1621
Arrays with given size.
Definition std_types.h:807
class_infot get_class(const pool_entry_lookupt &pool_entry) const
name_and_type_infot get_name_and_type(const pool_entry_lookupt &pool_entry) const
u2 get_name_and_type_index() const
base_ref_infot(const pool_entryt &entry)
Corresponds to the CONSTANT_Class_info Structure Described in Java 8 specification 4....
class_infot(const pool_entryt &entry)
std::string get_name(const pool_entry_lookupt &pool_entry) const
An expression describing a method on a class.
Definition std_expr.h:3633
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition dstring.h:38
bool empty() const
Definition dstring.h:89
const char * c_str() const
Definition dstring.h:116
Base class for all expressions.
Definition expr.h:56
std::vector< exprt > operandst
Definition expr.h:58
typet & type()
Return the type of the expression.
Definition expr.h:84
const irep_idt & get(const irep_idt &name) const
Definition irep.cpp:44
const irep_idt & id() const
Definition irep.h:388
pool_entryt & pool_entry(u2 index)
void get_annotation_class_refs(const std::vector< annotationt > &annotations)
For each of the given annotations, get a reference to its class and recursively get class references ...
void store_unknown_method_handle(size_t bootstrap_method_index)
Creates an unknown method handle and puts it into the parsed_class.
exprt get_relement_value()
Corresponds to the element_value structure Described in Java 8 specification 4.7.16....
java_bytecode_parse_treet::annotationt annotationt
java_bytecode_parse_treet::methodt methodt
void rRuntimeAnnotation_attribute(std::vector< annotationt > &)
const typet type_entry(u2 index)
void rbytecode(std::vector< instructiont > &)
void rcode_attribute(methodt &method)
void read_bootstrapmethods_entry()
Read all entries of the BootstrapMethods attribute of a class file.
void get_class_refs()
Get the class references for the benefit of a dependency analysis.
void get_annotation_value_class_refs(const exprt &value)
See java_bytecode_parsert::get_annotation_class_refs.
java_bytecode_parse_treet parse_tree
std::vector< pool_entryt > constant_poolt
java_bytecode_parsert(bool skip_instructions, message_handlert &message_handler)
void skip_bytes(std::size_t bytes)
void read_verification_type_info(methodt::verification_type_infot &)
java_bytecode_parse_treet::fieldt fieldt
void get_class_refs_rec(const typet &)
void parse_local_variable_type_table(methodt &method)
Parses the local variable type table of a method.
void rRuntimeAnnotation(annotationt &)
std::optional< lambda_method_handlet > parse_method_handle(const class method_handle_infot &entry)
Read method handle pointed to from constant pool entry at index, return type of method handle and nam...
void relement_value_pairs(annotationt::element_value_pairst &)
void rinner_classes_attribute(const u4 &attribute_length)
Corresponds to the element_value structure Described in Java 8 specification 4.7.6 https://docs....
std::vector< irep_idt > rexceptions_attribute()
Corresponds to the element_value structure Described in Java 8 specification 4.7.5 https://docs....
void rmethod_attribute(methodt &method)
method_handle_kindt
Indicates what sort of code should be synthesised for a lambda call:
Definition java_types.h:464
@ LAMBDA_STATIC_METHOD_HANDLE
Direct call to the given method.
@ LAMBDA_VIRTUAL_METHOD_HANDLE
Virtual call to the given interface or method.
@ LAMBDA_CONSTRUCTOR_HANDLE
Instantiate the needed type then call a constructor.
mstreamt & error() const
Definition message.h:391
mstreamt & debug() const
Definition message.h:421
static eomt eom
Definition message.h:289
method_handle_infot(const pool_entryt &entry)
base_ref_infot get_reference(const pool_entry_lookupt &pool_entry) const
method_handle_kindt handle_kind
method_handle_kindt get_handle_kind() const
method_handle_kindt
Correspond to the different valid values for field handle_kind From Java 8 spec 4....
Corresponds to the CONSTANT_NameAndType_info Structure Described in Java 8 specification 4....
name_and_type_infot(const pool_entryt &entry)
std::string get_descriptor(const pool_entry_lookupt &pool_entry) const
std::string get_name(const pool_entry_lookupt &pool_entry) const
std::istream * in
Definition parser.h:27
messaget log
Definition parser.h:144
Fixed-width bit-vector with two's complement interpretation.
const irep_idt & get_function() const
void set_java_bytecode_index(const irep_idt &index)
void set_line(const irep_idt &line)
void set_function(const irep_idt &function)
A struct tag type, i.e., struct_typet with an identifier.
Definition std_types.h:493
Structure type, corresponds to C style structs.
Definition std_types.h:231
std::function< pool_entryt &(u2)> pool_entry_lookupt
static std::string read_utf8_constant(const pool_entryt &entry)
structured_pool_entryt(const pool_entryt &entry)
static symbol_exprt typeless(const irep_idt &id)
Generate a symbol_exprt without a proper type.
Definition std_expr.h:150
const irep_idt & get_identifier() const
Definition std_types.h:410
An expression denoting a type.
Definition std_expr.h:3081
The type of an expression, extends irept.
Definition type.h:29
Fixed-width bit-vector with unsigned binary interpretation.
static format_containert< T > format(const T &o)
Definition format.h:37
const std::string & id2string(const irep_idt &d)
Definition irep.h:44
#define CONSTANT_Fieldref
#define CONSTANT_String
#define VTYPE_INFO_DOUBLE
#define UNUSED_u2(x)
#define ACC_PUBLIC
#define CONSTANT_InvokeDynamic
#define VTYPE_INFO_FLOAT
#define ACC_BRIDGE
#define ACC_NATIVE
#define T_SHORT
#define ACC_ENUM
#define CONSTANT_Methodref
#define T_LONG
#define T_FLOAT
#define ACC_VARARGS
#define CONSTANT_Long
#define T_INT
#define VTYPE_INFO_LONG
#define VTYPE_INFO_TOP
#define CONSTANT_NameAndType
#define ACC_ABSTRACT
#define VTYPE_INFO_OBJECT
#define ACC_ANNOTATION
#define T_DOUBLE
#define ACC_SYNCHRONIZED
#define T_BYTE
#define CONSTANT_Class
#define ACC_STATIC
#define CONSTANT_Integer
std::optional< java_bytecode_parse_treet > java_bytecode_parse(std::istream &istream, const irep_idt &class_name, message_handlert &message_handler, bool skip_instructions)
Attempt to parse a Java class from the given stream.
#define CONSTANT_MethodType
#define VTYPE_INFO_INTEGER
static java_class_typet::method_handle_kindt get_method_handle_type(method_handle_infot::method_handle_kindt java_handle_kind)
Translate the lambda method reference kind in a class file into the kind of handling the method requi...
#define ACC_PROTECTED
#define CONSTANT_Double
#define ACC_PRIVATE
#define CONSTANT_Float
#define VTYPE_INFO_UNINIT
#define CONSTANT_InterfaceMethodref
#define T_BOOLEAN
#define ACC_INTERFACE
#define ACC_SYNTHETIC
#define VTYPE_INFO_UNINIT_THIS
#define ACC_FINAL
#define CONSTANT_MethodHandle
#define T_CHAR
#define VTYPE_INFO_ITEM_NULL
#define CONSTANT_Utf8
Representation of a constant Java string.
void get_dependencies_from_generic_parameters(const std::string &signature, std::set< irep_idt > &refs)
Collect information about generic type parameters from a given signature.
struct_tag_typet java_classname(const std::string &id)
std::optional< typet > java_type_from_string(const std::string &src, const std::string &class_name_prefix)
Transforms a string representation of a Java type into an internal type representation thereof.
bool is_java_array_tag(const irep_idt &tag)
See above.
const typet & java_array_element_type(const struct_tag_typet &array_symbol)
Return a const reference to the element type of a given java array type.
const java_method_typet & to_java_method_type(const typet &type)
Definition java_types.h:183
std::optional< typet > java_type_from_string_with_exception(const std::string &descriptor, const std::optional< std::string > &signature, const std::string &class_name)
static std::optional< java_class_typet::java_lambda_method_handlet > lambda_method_handle(const symbol_table_baset &symbol_table, const irep_idt &method_identifier, const java_method_typet &dynamic_method_type)
Parser utilities.
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition invariant.h:534
#define PRECONDITION(CONDITION)
Definition invariant.h:463
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition invariant.h:423
#define POSTCONDITION(CONDITION)
Definition invariant.h:479
API to expression classes.
const struct_typet & to_struct_type(const typet &type)
Cast a typet to a struct_typet.
Definition std_types.h:308
const struct_tag_typet & to_struct_tag_type(const typet &type)
Cast a typet to a struct_tag_typet.
Definition std_types.h:518
std::vector< element_value_pairt > element_value_pairst
void add_method_handle(size_t bootstrap_index, const lambda_method_handlet &handle)
std::vector< annotationst > parameter_annotations
Java annotations that were applied to parameters of this method.