CBMC
Loading...
Searching...
No Matches
java_bytecode_parser.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module:
4
5Author: Daniel Kroening, kroening@kroening.com
6
7\*******************************************************************/
8
10
11#include <algorithm>
12#include <fstream>
13#include <map>
14#include <string>
15
16#include <util/arith_tools.h>
17#include <util/ieee_float.h>
18#include <util/parser.h>
19#include <util/std_expr.h>
21
22#include "bytecode_info.h"
25#include "java_types.h"
26
27class java_bytecode_parsert final : public parsert
28{
29public:
32 message_handlert &message_handler)
34 {
35 }
36
37 bool parse() override;
38
40 {
41 u1 tag = 0;
42 u2 ref1 = 0;
43 u2 ref2 = 0;
47 };
48
50
51private:
60
61 using constant_poolt = std::vector<pool_entryt>;
62
64
65 const bool skip_instructions = false;
66
68 {
69 if(index==0 || index>=constant_pool.size())
70 {
71 log.error() << "invalid constant pool index (" << index << ")"
73 log.error() << "constant pool size: " << constant_pool.size()
75 throw 0;
76 }
77
78 return constant_pool[index];
79 }
80
82 {
83 return pool_entry(index).expr;
84 }
85
86 const typet type_entry(u2 index)
87 {
89 }
90
91 void rClassFile();
92 void rconstant_pool();
93 void rinterfaces();
94 void rfields();
95 void rmethods();
96 void rmethod();
98 std::vector<irep_idt> rexceptions_attribute();
99 void rclass_attribute();
100 void rRuntimeAnnotation_attribute(std::vector<annotationt> &);
104 void rmethod_attribute(methodt &method);
105 void rfield_attribute(fieldt &);
106 void rcode_attribute(methodt &method);
107 void read_verification_type_info(methodt::verification_type_infot &);
108 void rbytecode(std::vector<instructiont> &);
109 void get_class_refs();
110 void get_class_refs_rec(const typet &);
111 void get_annotation_class_refs(const std::vector<annotationt> &annotations);
112 void get_annotation_value_class_refs(const exprt &value);
114 std::optional<lambda_method_handlet>
115 parse_method_handle(const class method_handle_infot &entry);
117
118 void skip_bytes(std::size_t bytes)
119 {
120 for(std::size_t i=0; i<bytes; i++)
121 {
122 if(!*in)
123 {
124 log.error() << "unexpected end of bytecode file" << messaget::eom;
125 throw 0;
126 }
127 in->get();
128 }
129 }
130
131 template <typename T>
133 {
134 static_assert(
135 std::is_unsigned<T>::value || std::is_signed<T>::value,
136 "T should be a signed or unsigned integer");
137 const constexpr size_t bytes = sizeof(T);
138 if(bytes == 1)
139 {
140 if(!*in)
141 {
142 log.error() << "unexpected end of bytecode file" << messaget::eom;
143 throw 0;
144 }
145 return static_cast<T>(in->get());
146 }
147 T result = 0;
148 for(size_t i = 0; i < bytes; i++)
149 {
150 if(!*in)
151 {
152 log.error() << "unexpected end of bytecode file" << messaget::eom;
153 throw 0;
154 }
155 result <<= 8u;
156 result |= static_cast<u1>(in->get());
157 }
158 return result;
159 }
160
162};
163
164#define CONSTANT_Class 7
165#define CONSTANT_Fieldref 9
166#define CONSTANT_Methodref 10
167#define CONSTANT_InterfaceMethodref 11
168#define CONSTANT_String 8
169#define CONSTANT_Integer 3
170#define CONSTANT_Float 4
171#define CONSTANT_Long 5
172#define CONSTANT_Double 6
173#define CONSTANT_NameAndType 12
174#define CONSTANT_Utf8 1
175#define CONSTANT_MethodHandle 15
176#define CONSTANT_MethodType 16
177#define CONSTANT_InvokeDynamic 18
178
179#define VTYPE_INFO_TOP 0
180#define VTYPE_INFO_INTEGER 1
181#define VTYPE_INFO_FLOAT 2
182#define VTYPE_INFO_LONG 3
183#define VTYPE_INFO_DOUBLE 4
184#define VTYPE_INFO_ITEM_NULL 5
185#define VTYPE_INFO_UNINIT_THIS 6
186#define VTYPE_INFO_OBJECT 7
187#define VTYPE_INFO_UNINIT 8
188
190{
191public:
193 using pool_entry_lookupt = std::function<pool_entryt &(u2)>;
194
195 explicit structured_pool_entryt(const pool_entryt &entry) : tag(entry.tag)
196 {
197 }
198
199 u1 get_tag() const
200 {
201 return tag;
202 }
203
204protected:
205 static std::string read_utf8_constant(const pool_entryt &entry)
206 {
207 INVARIANT(
208 entry.tag == CONSTANT_Utf8, "Name entry must be a constant UTF-8");
209 return id2string(entry.s);
210 }
211
212private:
214};
215
219{
220public:
221 explicit class_infot(const pool_entryt &entry) : structured_pool_entryt(entry)
222 {
224 name_index = entry.ref1;
225 }
226
227 std::string get_name(const pool_entry_lookupt &pool_entry) const
228 {
229 const pool_entryt &name_entry = pool_entry(name_index);
231 }
232
233private:
235};
236
240{
241public:
242 explicit name_and_type_infot(const pool_entryt &entry)
244 {
246 name_index = entry.ref1;
247 descriptor_index = entry.ref2;
248 }
249
250 std::string get_name(const pool_entry_lookupt &pool_entry) const
251 {
252 const pool_entryt &name_entry = pool_entry(name_index);
254 }
255
256 std::string get_descriptor(const pool_entry_lookupt &pool_entry) const
257 {
258 const pool_entryt &descriptor_entry = pool_entry(descriptor_index);
260 }
261
262private:
265};
266
268{
269public:
270 explicit base_ref_infot(const pool_entryt &entry)
272 {
274 entry.tag == CONSTANT_Fieldref || entry.tag == CONSTANT_Methodref ||
276 class_index = entry.ref1;
278 }
279
281 {
282 return class_index;
283 }
285 {
286 return name_and_type_index;
287 }
288
290 get_name_and_type(const pool_entry_lookupt &pool_entry) const
291 {
293
294 INVARIANT(
296 "name_and_typeindex did not correspond to a name_and_type in the "
297 "constant pool");
298
300 }
301
302 class_infot get_class(const pool_entry_lookupt &pool_entry) const
303 {
304 const pool_entryt &class_entry = pool_entry(class_index);
305
306 return class_infot{class_entry};
307 }
308
309private:
312};
313
315{
316public:
321 {
322 REF_getField = 1,
323 REF_getStatic = 2,
324 REF_putField = 3,
325 REF_putStatic = 4,
331 };
332
333 explicit method_handle_infot(const pool_entryt &entry)
335 {
337 PRECONDITION(entry.ref1 > 0 && entry.ref1 < 10); // Java 8 spec 4.4.8
338 handle_kind = static_cast<method_handle_kindt>(entry.ref1);
339 reference_index = entry.ref2;
340 }
341
343 {
344 return handle_kind;
345 }
346
348 {
349 const base_ref_infot ref_entry{pool_entry(reference_index)};
350
351 // validate the correctness of the constant pool entry
352 switch(handle_kind)
353 {
358 {
359 INVARIANT(ref_entry.get_tag() == CONSTANT_Fieldref, "4.4.2");
360 break;
361 }
364 {
365 INVARIANT(ref_entry.get_tag() == CONSTANT_Methodref, "4.4.2");
366 break;
367 }
370 {
371 INVARIANT(
372 ref_entry.get_tag() == CONSTANT_Methodref ||
374 "4.4.2");
375 break;
376 }
378 {
379 INVARIANT(ref_entry.get_tag() == CONSTANT_InterfaceMethodref, "4.4.8");
380 break;
381 }
382 }
383
384 return ref_entry;
385 }
386
387private:
390};
391
393{
394 try
395 {
396 rClassFile();
397 }
398
399 catch(const char *message)
400 {
401 log.error() << message << messaget::eom;
402 return true;
403 }
404
405 catch(const std::string &message)
406 {
407 log.error() << message << messaget::eom;
408 return true;
409 }
410
411 catch(...)
412 {
413 log.error() << "parsing error" << messaget::eom;
414 return true;
415 }
416
417 return false;
418}
419
420#define ACC_PUBLIC 0x0001u
421#define ACC_PRIVATE 0x0002u
422#define ACC_PROTECTED 0x0004u
423#define ACC_STATIC 0x0008u
424#define ACC_FINAL 0x0010u
425#define ACC_SYNCHRONIZED 0x0020u
426#define ACC_BRIDGE 0x0040u
427#define ACC_NATIVE 0x0100u
428#define ACC_INTERFACE 0x0200u
429#define ACC_ABSTRACT 0x0400u
430#define ACC_STRICT 0x0800u
431#define ACC_SYNTHETIC 0x1000u
432#define ACC_ANNOTATION 0x2000u
433#define ACC_ENUM 0x4000u
434
435#define UNUSED_u2(x) \
436 { \
437 const u2 x = read<u2>(); \
438 (void)x; \
439 } \
440 (void)0
441
443{
445
446 const u4 magic = read<u4>();
448 const u2 major_version = read<u2>();
449
450 if(magic!=0xCAFEBABE)
451 {
452 log.error() << "wrong magic" << messaget::eom;
453 throw 0;
454 }
455
456 if(major_version<44)
457 {
458 log.error() << "unexpected major version" << messaget::eom;
459 throw 0;
460 }
461
463
464 classt &parsed_class=parse_tree.parsed_class;
465
466 const u2 access_flags = read<u2>();
467 const u2 this_class = read<u2>();
468 const u2 super_class = read<u2>();
469
470 parsed_class.is_abstract=(access_flags&ACC_ABSTRACT)!=0;
471 parsed_class.is_enum=(access_flags&ACC_ENUM)!=0;
472 parsed_class.is_public=(access_flags&ACC_PUBLIC)!=0;
473 parsed_class.is_protected=(access_flags&ACC_PROTECTED)!=0;
474 parsed_class.is_private=(access_flags&ACC_PRIVATE)!=0;
475 parsed_class.is_final = (access_flags & ACC_FINAL) != 0;
476 parsed_class.is_interface = (access_flags & ACC_INTERFACE) != 0;
477 parsed_class.is_synthetic = (access_flags & ACC_SYNTHETIC) != 0;
478 parsed_class.is_annotation = (access_flags & ACC_ANNOTATION) != 0;
479 parsed_class.name=
481
482 if(super_class!=0)
483 parsed_class.super_class = constant(super_class).type().get(ID_C_base_name);
484
485 rinterfaces();
486 rfields();
487 rmethods();
488
489 // count elements of enum
490 if(parsed_class.is_enum)
492 if(field.is_enum)
494
495 const u2 attributes_count = read<u2>();
496
497 for(std::size_t j=0; j<attributes_count; j++)
499
501
503}
504
507{
508 for(const auto &c : constant_pool)
509 {
510 switch(c.tag)
511 {
512 case CONSTANT_Class:
513 get_class_refs_rec(c.expr.type());
514 break;
515
519 break;
520
521 default: {}
522 }
523 }
524
526
527 for(const auto &field : parse_tree.parsed_class.fields)
528 {
529 get_annotation_class_refs(field.annotations);
530
531 if(field.signature.has_value())
532 {
534 field.descriptor,
535 field.signature,
537
538 // add generic type args to class refs as dependencies, same below for
539 // method types and entries from the local variable type table
541 field_type, parse_tree.class_refs);
542 get_class_refs_rec(field_type);
543 }
544 else
545 {
547 }
548 }
549
550 for(const auto &method : parse_tree.parsed_class.methods)
551 {
552 get_annotation_class_refs(method.annotations);
553 for(const auto &parameter_annotations : method.parameter_annotations)
554 get_annotation_class_refs(parameter_annotations);
555
556 if(method.signature.has_value())
557 {
559 method.descriptor,
560 method.signature,
565 }
566 else
567 {
568 get_class_refs_rec(*java_type_from_string(method.descriptor));
569 }
570
571 for(const auto &var : method.local_variable_table)
572 {
573 if(var.signature.has_value())
574 {
576 var.descriptor,
577 var.signature,
582 }
583 else
584 {
586 }
587 }
588 }
589}
590
592{
593 if(src.id()==ID_code)
594 {
596 const typet &rt=ct.return_type();
598 for(const auto &p : ct.parameters())
599 get_class_refs_rec(p.type());
600 }
601 else if(src.id() == ID_struct_tag)
602 {
604 if(is_java_array_tag(struct_tag_type.get_identifier()))
606 else
608 }
609 else if(src.id()==ID_struct)
610 {
612 for(const auto &c : struct_type.components())
613 get_class_refs_rec(c.type());
614 }
615 else if(src.id()==ID_pointer)
616 get_class_refs_rec(to_pointer_type(src).base_type());
617}
618
622 const std::vector<annotationt> &annotations)
623{
624 for(const auto &annotation : annotations)
625 {
626 get_class_refs_rec(annotation.type);
627 for(const auto &element_value_pair : annotation.element_value_pairs)
629 }
630}
631
636{
637 if(const auto &symbol_expr = expr_try_dynamic_cast<symbol_exprt>(value))
638 {
639 const irep_idt &value_id = symbol_expr->get_identifier();
641 }
642 else if(const auto &array_expr = expr_try_dynamic_cast<array_exprt>(value))
643 {
644 for(const exprt &operand : array_expr->operands())
646 }
647 // TODO: enum and nested annotation cases (once these are correctly parsed by
648 // get_relement_value).
649 // Note that in the cases where expr is a string or primitive type, no
650 // additional class references are needed.
651}
652
654{
657 {
658 log.error() << "invalid constant_pool_count" << messaget::eom;
659 throw 0;
660 }
661
663
664 for(auto it = std::next(constant_pool.begin()); it != constant_pool.end();
665 it++)
666 {
667 it->tag = read<u1>();
668
669 switch(it->tag)
670 {
671 case CONSTANT_Class:
672 it->ref1 = read<u2>();
673 break;
674
680 it->ref1 = read<u2>();
681 it->ref2 = read<u2>();
682 break;
683
684 case CONSTANT_String:
686 it->ref1 = read<u2>();
687 break;
688
689 case CONSTANT_Integer:
690 case CONSTANT_Float:
691 it->number = read<u4>();
692 break;
693
694 case CONSTANT_Long:
695 case CONSTANT_Double:
696 it->number = read<u8>();
697 // Eight-byte constants take up two entries in the constant_pool table.
698 if(it==constant_pool.end())
699 {
700 log.error() << "invalid double entry" << messaget::eom;
701 throw 0;
702 }
703 it++;
704 it->tag=0;
705 break;
706
707 case CONSTANT_Utf8:
708 {
709 const u2 bytes = read<u2>();
710 std::string s;
711 s.resize(bytes);
712 for(auto &ch : s)
714 it->s = s; // Add to string table
715 break;
716 }
717
719 it->ref1 = read<u1>();
720 it->ref2 = read<u2>();
721 break;
722
723 default:
724 log.error() << "unknown constant pool entry (" << it->tag << ")"
725 << messaget::eom;
726 throw 0;
727 }
728 }
729
730 // we do a bit of post-processing after we have them all
731 std::for_each(
732 std::next(constant_pool.begin()),
733 constant_pool.end(),
734 [&](constant_poolt::value_type &entry) {
735 switch(entry.tag)
736 {
737 case CONSTANT_Class:
738 {
739 const std::string &s = id2string(pool_entry(entry.ref1).s);
740 entry.expr = type_exprt(java_classname(s));
741 }
742 break;
743
744 case CONSTANT_Fieldref:
745 {
746 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
747 const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
748 const pool_entryt &class_entry = pool_entry(entry.ref1);
749 const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
750 typet type=type_entry(nameandtype_entry.ref2);
751
752 auto class_tag = java_classname(id2string(class_name_entry.s));
753
754 fieldref_exprt fieldref(type, name_entry.s, class_tag.get_identifier());
755
756 entry.expr = fieldref;
757 }
758 break;
759
760 case CONSTANT_Methodref:
761 case CONSTANT_InterfaceMethodref:
762 {
763 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
764 const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
765 const pool_entryt &class_entry = pool_entry(entry.ref1);
766 const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
767 typet type=type_entry(nameandtype_entry.ref2);
768
769 auto class_tag = java_classname(id2string(class_name_entry.s));
770
771 irep_idt mangled_method_name =
772 id2string(name_entry.s) + ":" +
773 id2string(pool_entry(nameandtype_entry.ref2).s);
774
775 irep_idt class_id = class_tag.get_identifier();
776
777 entry.expr = class_method_descriptor_exprt{
778 type, mangled_method_name, class_id, name_entry.s};
779 }
780 break;
781
782 case CONSTANT_String:
783 {
784 // ldc turns these into references to java.lang.String
785 entry.expr = java_string_literal_exprt{pool_entry(entry.ref1).s};
786 }
787 break;
788
789 case CONSTANT_Integer:
790 entry.expr = from_integer(entry.number, java_int_type());
791 break;
792
793 case CONSTANT_Float:
794 {
795 ieee_float_valuet value(ieee_float_spect::single_precision());
796 value.unpack(entry.number);
797 entry.expr = value.to_expr();
798 }
799 break;
800
801 case CONSTANT_Long:
802 entry.expr = from_integer(entry.number, java_long_type());
803 break;
804
805 case CONSTANT_Double:
806 {
807 ieee_float_valuet value(ieee_float_spect::double_precision());
808 value.unpack(entry.number);
809 entry.expr = value.to_expr();
810 }
811 break;
812
813 case CONSTANT_NameAndType:
814 {
815 entry.expr.id("nameandtype");
816 }
817 break;
818
819 case CONSTANT_MethodHandle:
820 {
821 entry.expr.id("methodhandle");
822 }
823 break;
824
825 case CONSTANT_MethodType:
826 {
827 entry.expr.id("methodtype");
828 }
829 break;
830
831 case CONSTANT_InvokeDynamic:
832 {
833 entry.expr.id("invokedynamic");
834 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
835 typet type=type_entry(nameandtype_entry.ref2);
836 type.set(ID_java_lambda_method_handle_index, entry.ref1);
837 entry.expr.type() = type;
838 }
839 break;
840 }
841 });
842}
843
845{
846 const u2 interfaces_count = read<u2>();
847
848 for(std::size_t i=0; i<interfaces_count; i++)
850 constant(read<u2>()).type().get(ID_C_base_name));
851}
852
854{
855 const u2 fields_count = read<u2>();
856
857 for(std::size_t i=0; i<fields_count; i++)
858 {
860
861 const u2 access_flags = read<u2>();
862 const u2 name_index = read<u2>();
863 const u2 descriptor_index = read<u2>();
864 const u2 attributes_count = read<u2>();
865
866 field.name=pool_entry(name_index).s;
867 field.is_static=(access_flags&ACC_STATIC)!=0;
868 field.is_final=(access_flags&ACC_FINAL)!=0;
869 field.is_enum=(access_flags&ACC_ENUM)!=0;
870
871 field.descriptor=id2string(pool_entry(descriptor_index).s);
872 field.is_public=(access_flags&ACC_PUBLIC)!=0;
873 field.is_protected=(access_flags&ACC_PROTECTED)!=0;
874 field.is_private=(access_flags&ACC_PRIVATE)!=0;
875 const auto flags = (field.is_public ? 1 : 0) +
876 (field.is_protected?1:0)+
877 (field.is_private?1:0);
878 DATA_INVARIANT(flags<=1, "at most one of public, protected, private");
879
880 for(std::size_t j=0; j<attributes_count; j++)
882 }
883}
884
885#define T_BOOLEAN 4
886#define T_CHAR 5
887#define T_FLOAT 6
888#define T_DOUBLE 7
889#define T_BYTE 8
890#define T_SHORT 9
891#define T_INT 10
892#define T_LONG 11
893
894void java_bytecode_parsert::rbytecode(std::vector<instructiont> &instructions)
895{
896 const u4 code_length = read<u4>();
897
898 u4 address;
899 size_t bytecode_index=0; // index of bytecode instruction
900
901 for(address=0; address<code_length; address++)
902 {
903 bool wide_instruction=false;
904 u4 start_of_instruction=address;
905
906 u1 bytecode = read<u1>();
907
908 if(bytecode == BC_wide)
909 {
910 wide_instruction=true;
911 address++;
912 bytecode = read<u1>();
913 // The only valid instructions following a wide byte are
914 // [ifald]load, [ifald]store, ret and iinc
915 // All of these have either format of v, or V
916 INVARIANT(
917 bytecode_info[bytecode].format == 'v' ||
918 bytecode_info[bytecode].format == 'V',
919 std::string("Unexpected wide instruction: ") +
920 bytecode_info[bytecode].mnemonic);
921 }
922
923 instructions.emplace_back();
924 instructiont &instruction=instructions.back();
925 instruction.bytecode = bytecode;
926 instruction.address=start_of_instruction;
927 instruction.source_location
928 .set_java_bytecode_index(std::to_string(bytecode_index));
929
930 switch(bytecode_info[bytecode].format)
931 {
932 case ' ': // no further bytes
933 break;
934
935 case 'c': // a constant_pool index (one byte)
937 {
938 instruction.args.push_back(constant(read<u2>()));
939 address+=2;
940 }
941 else
942 {
943 instruction.args.push_back(constant(read<u1>()));
944 address+=1;
945 }
946 break;
947
948 case 'C': // a constant_pool index (two bytes)
949 instruction.args.push_back(constant(read<u2>()));
950 address+=2;
951 break;
952
953 case 'b': // a signed byte
954 {
955 const s1 c = read<s1>();
956 instruction.args.push_back(from_integer(c, signedbv_typet(8)));
957 address+=1;
958 break;
959 }
960
961 case 'o': // two byte branch offset, signed
962 {
963 const s2 offset = read<s2>();
964 // By converting the signed offset into an absolute address (by adding
965 // the current address) the number represented becomes unsigned.
966 instruction.args.push_back(
967 from_integer(address + offset, unsignedbv_typet(16)));
968 address+=2;
969 break;
970 }
971
972 case 'O': // four byte branch offset, signed
973 {
974 const s4 offset = read<s4>();
975 // By converting the signed offset into an absolute address (by adding
976 // the current address) the number represented becomes unsigned.
977 instruction.args.push_back(
978 from_integer(address + offset, unsignedbv_typet(32)));
979 address+=4;
980 break;
981 }
982
983 case 'v': // local variable index (one byte)
984 {
986 {
987 const u2 v = read<u2>();
988 instruction.args.push_back(from_integer(v, unsignedbv_typet(16)));
989 address += 2;
990 }
991 else
992 {
993 const u1 v = read<u1>();
994 instruction.args.push_back(from_integer(v, unsignedbv_typet(8)));
995 address += 1;
996 }
997 }
998
999 break;
1000
1001 case 'V':
1002 // local variable index (two bytes) plus two signed bytes
1004 {
1005 const u2 v = read<u2>();
1006 instruction.args.push_back(from_integer(v, unsignedbv_typet(16)));
1007 const s2 c = read<s2>();
1008 instruction.args.push_back(from_integer(c, signedbv_typet(16)));
1009 address+=4;
1010 }
1011 else // local variable index (one byte) plus one signed byte
1012 {
1013 const u1 v = read<u1>();
1014 instruction.args.push_back(from_integer(v, unsignedbv_typet(8)));
1015 const s1 c = read<s1>();
1016 instruction.args.push_back(from_integer(c, signedbv_typet(8)));
1017 address+=2;
1018 }
1019 break;
1020
1021 case 'I': // two byte constant_pool index plus two bytes
1022 {
1023 const u2 c = read<u2>();
1024 instruction.args.push_back(constant(c));
1025 const u1 b1 = read<u1>();
1026 instruction.args.push_back(from_integer(b1, unsignedbv_typet(8)));
1027 const u1 b2 = read<u1>();
1028 instruction.args.push_back(from_integer(b2, unsignedbv_typet(8)));
1029 }
1030 address+=4;
1031 break;
1032
1033 case 'L': // lookupswitch
1034 {
1035 u4 base_offset=address;
1036
1037 // first a pad to 32-bit align
1038 while(((address + 1u) & 3u) != 0)
1039 {
1040 read<u1>();
1041 address++;
1042 }
1043
1044 // now default value
1045 const s4 default_value = read<s4>();
1046 // By converting the signed offset into an absolute address (by adding
1047 // the current address) the number represented becomes unsigned.
1048 instruction.args.push_back(
1049 from_integer(base_offset+default_value, unsignedbv_typet(32)));
1050 address+=4;
1051
1052 // number of pairs
1053 const u4 npairs = read<u4>();
1054 address+=4;
1055
1056 for(std::size_t i=0; i<npairs; i++)
1057 {
1058 const s4 match = read<s4>();
1059 const s4 offset = read<s4>();
1060 instruction.args.push_back(
1061 from_integer(match, signedbv_typet(32)));
1062 // By converting the signed offset into an absolute address (by adding
1063 // the current address) the number represented becomes unsigned.
1064 instruction.args.push_back(
1066 address+=8;
1067 }
1068 }
1069 break;
1070
1071 case 'T': // tableswitch
1072 {
1073 size_t base_offset=address;
1074
1075 // first a pad to 32-bit align
1076 while(((address + 1u) & 3u) != 0)
1077 {
1078 read<u1>();
1079 address++;
1080 }
1081
1082 // now default value
1083 const s4 default_value = read<s4>();
1084 instruction.args.push_back(
1085 from_integer(base_offset+default_value, signedbv_typet(32)));
1086 address+=4;
1087
1088 // now low value
1089 const s4 low_value = read<s4>();
1090 address+=4;
1091
1092 // now high value
1093 const s4 high_value = read<s4>();
1094 address+=4;
1095
1096 // there are high-low+1 offsets, and they are signed
1097 for(s4 i=low_value; i<=high_value; i++)
1098 {
1099 s4 offset = read<s4>();
1100 instruction.args.push_back(from_integer(i, signedbv_typet(32)));
1101 // By converting the signed offset into an absolute address (by adding
1102 // the current address) the number represented becomes unsigned.
1103 instruction.args.push_back(
1105 address+=4;
1106 }
1107 }
1108 break;
1109
1110 case 'm': // multianewarray: constant-pool index plus one unsigned byte
1111 {
1112 const u2 c = read<u2>(); // constant-pool index
1113 instruction.args.push_back(constant(c));
1114 const u1 dimensions = read<u1>(); // number of dimensions
1115 instruction.args.push_back(
1117 address+=3;
1118 }
1119 break;
1120
1121 case 't': // array subtype, one byte
1122 {
1123 typet t;
1124 switch(read<u1>())
1125 {
1126 case T_BOOLEAN: t.id(ID_bool); break;
1127 case T_CHAR: t.id(ID_char); break;
1128 case T_FLOAT: t.id(ID_float); break;
1129 case T_DOUBLE: t.id(ID_double); break;
1130 case T_BYTE: t.id(ID_byte); break;
1131 case T_SHORT: t.id(ID_short); break;
1132 case T_INT: t.id(ID_int); break;
1133 case T_LONG: t.id(ID_long); break;
1134 default:{};
1135 }
1136 instruction.args.push_back(type_exprt(t));
1137 }
1138 address+=1;
1139 break;
1140
1141 case 's': // a signed short
1142 {
1143 const s2 s = read<s2>();
1144 instruction.args.push_back(from_integer(s, signedbv_typet(16)));
1145 }
1146 address+=2;
1147 break;
1148
1149 default:
1150 throw "unknown JVM bytecode instruction";
1151 }
1153 }
1154
1155 if(address!=code_length)
1156 {
1157 log.error() << "bytecode length mismatch" << messaget::eom;
1158 throw 0;
1159 }
1160}
1161
1163{
1165 const u4 attribute_length = read<u4>();
1166
1168
1169 if(attribute_name == "Code")
1170 {
1173
1176 else
1177 rbytecode(method.instructions);
1178
1182 else
1183 {
1185
1186 for(std::size_t e = 0; e < exception_table_length; e++)
1187 {
1188 const u2 start_pc = read<u2>();
1189 const u2 end_pc = read<u2>();
1190
1191 // From the class file format spec ("4.7.3. The Code Attribute" for
1192 // Java8)
1193 INVARIANT(
1194 start_pc < end_pc,
1195 "The start_pc must be less than the end_pc as this is the range the "
1196 "exception is active");
1197
1198 const u2 handler_pc = read<u2>();
1199 const u2 catch_type = read<u2>();
1200 method.exception_table[e].start_pc = start_pc;
1201 method.exception_table[e].end_pc = end_pc;
1202 method.exception_table[e].handler_pc = handler_pc;
1203 if(catch_type != 0)
1204 method.exception_table[e].catch_type =
1205 to_struct_tag_type(pool_entry(catch_type).expr.type());
1206 }
1207 }
1208
1210
1211 for(std::size_t j=0; j<attributes_count; j++)
1212 rcode_attribute(method);
1213
1214 // method name
1216 "java::" + id2string(parse_tree.parsed_class.name) + "." +
1217 id2string(method.name) + ":" + method.descriptor);
1218
1219 irep_idt line_number;
1220
1221 // add missing line numbers
1222 for(auto &instruction : method.instructions)
1223 {
1224 if(!instruction.source_location.get_line().empty())
1225 line_number = instruction.source_location.get_line();
1226 else if(!line_number.empty())
1227 instruction.source_location.set_line(line_number);
1228 instruction.source_location.set_function(
1229 method.source_location.get_function());
1230 }
1231
1232 // line number of method (the first line number available)
1233 const auto it = std::find_if(
1234 method.instructions.begin(),
1235 method.instructions.end(),
1236 [&](const instructiont &instruction) {
1237 return !instruction.source_location.get_line().empty();
1238 });
1239 if(it != method.instructions.end())
1240 method.source_location.set_line(it->source_location.get_line());
1241 }
1242 else if(attribute_name=="Signature")
1243 {
1244 const u2 signature_index = read<u2>();
1246 }
1247 else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1248 attribute_name=="RuntimeVisibleAnnotations")
1249 {
1251 }
1252 else if(
1253 attribute_name == "RuntimeInvisibleParameterAnnotations" ||
1254 attribute_name == "RuntimeVisibleParameterAnnotations")
1255 {
1256 const u1 parameter_count = read<u1>();
1257 // There may be attributes for both runtime-visible and runtime-invisible
1258 // annotations, the length of either array may be longer than the other as
1259 // trailing parameters without annotations are omitted.
1260 // Extend our parameter_annotations if this one is longer than the one
1261 // previously recorded (if any).
1262 if(method.parameter_annotations.size() < parameter_count)
1266 }
1267 else if(attribute_name == "Exceptions")
1268 {
1270 }
1271 else
1273}
1274
1276{
1278 const u4 attribute_length = read<u4>();
1279
1281
1282 if(attribute_name=="Signature")
1283 {
1284 const u2 signature_index = read<u2>();
1286 }
1287 else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1288 attribute_name=="RuntimeVisibleAnnotations")
1289 {
1291 }
1292 else
1294}
1295
1297{
1299 const u4 attribute_length = read<u4>();
1300
1302
1303 if(attribute_name=="LineNumberTable")
1304 {
1305 std::map<unsigned, std::reference_wrapper<instructiont>> instruction_map;
1306 for(auto &instruction : method.instructions)
1307 instruction_map.emplace(instruction.address, instruction);
1308
1310
1311 for(std::size_t i=0; i<line_number_table_length; i++)
1312 {
1313 const u2 start_pc = read<u2>();
1314 const u2 line_number = read<u2>();
1315
1316 // annotate the bytecode program
1317 auto it = instruction_map.find(start_pc);
1318
1319 if(it!=instruction_map.end())
1320 it->second.get().source_location.set_line(line_number);
1321 }
1322 }
1323 else if(attribute_name=="LocalVariableTable")
1324 {
1326
1328
1329 for(std::size_t i=0; i<local_variable_table_length; i++)
1330 {
1331 const u2 start_pc = read<u2>();
1332 const u2 length = read<u2>();
1333 const u2 name_index = read<u2>();
1334 const u2 descriptor_index = read<u2>();
1335 const u2 index = read<u2>();
1336
1337 method.local_variable_table[i].index=index;
1338 method.local_variable_table[i].name=pool_entry(name_index).s;
1339 method.local_variable_table[i].descriptor=
1340 id2string(pool_entry(descriptor_index).s);
1341 method.local_variable_table[i].start_pc=start_pc;
1342 method.local_variable_table[i].length=length;
1343 }
1344 }
1345 else if(attribute_name=="LocalVariableTypeTable")
1346 {
1348 }
1349 else if(attribute_name=="StackMapTable")
1350 {
1351 const u2 stack_map_entries = read<u2>();
1352
1353 method.stack_map_table.resize(stack_map_entries);
1354
1355 for(size_t i=0; i<stack_map_entries; i++)
1356 {
1357 const u1 frame_type = read<u1>();
1358 if(frame_type<=63)
1359 {
1360 method.stack_map_table[i].type=methodt::stack_map_table_entryt::SAME;
1361 method.stack_map_table[i].locals.resize(0);
1362 method.stack_map_table[i].stack.resize(0);
1363 }
1364 else if(64<=frame_type && frame_type<=127)
1365 {
1366 method.stack_map_table[i].type=
1367 methodt::stack_map_table_entryt::SAME_LOCALS_ONE_STACK;
1368 method.stack_map_table[i].locals.resize(0);
1369 method.stack_map_table[i].stack.resize(1);
1372 method.stack_map_table[i].stack[0]=verification_type_info;
1373 }
1374 else if(frame_type==247)
1375 {
1376 method.stack_map_table[i].type=
1377 methodt::stack_map_table_entryt::SAME_LOCALS_ONE_STACK_EXTENDED;
1378 method.stack_map_table[i].locals.resize(0);
1379 method.stack_map_table[i].stack.resize(1);
1381 const u2 offset_delta = read<u2>();
1383 method.stack_map_table[i].stack[0]=verification_type_info;
1384 method.stack_map_table[i].offset_delta=offset_delta;
1385 }
1386 else if(248<=frame_type && frame_type<=250)
1387 {
1388 method.stack_map_table[i].type=methodt::stack_map_table_entryt::CHOP;
1389 method.stack_map_table[i].locals.resize(0);
1390 method.stack_map_table[i].stack.resize(0);
1391 const u2 offset_delta = read<u2>();
1392 method.stack_map_table[i].offset_delta=offset_delta;
1393 }
1394 else if(frame_type==251)
1395 {
1396 method.stack_map_table[i].type
1397 =methodt::stack_map_table_entryt::SAME_EXTENDED;
1398 method.stack_map_table[i].locals.resize(0);
1399 method.stack_map_table[i].stack.resize(0);
1400 const u2 offset_delta = read<u2>();
1401 method.stack_map_table[i].offset_delta=offset_delta;
1402 }
1403 else if(252<=frame_type && frame_type<=254)
1404 {
1405 size_t new_locals = frame_type - 251;
1406 method.stack_map_table[i].type=methodt::stack_map_table_entryt::APPEND;
1407 method.stack_map_table[i].locals.resize(new_locals);
1408 method.stack_map_table[i].stack.resize(0);
1409 const u2 offset_delta = read<u2>();
1410 method.stack_map_table[i].offset_delta=offset_delta;
1411 for(size_t k=0; k<new_locals; k++)
1412 {
1413 method.stack_map_table[i].locals
1416 method.stack_map_table[i].locals.back();
1418 }
1419 }
1420 else if(frame_type==255)
1421 {
1422 method.stack_map_table[i].type=methodt::stack_map_table_entryt::FULL;
1423 const u2 offset_delta = read<u2>();
1424 method.stack_map_table[i].offset_delta=offset_delta;
1425 const u2 number_locals = read<u2>();
1426 method.stack_map_table[i].locals.resize(number_locals);
1427 for(size_t k=0; k<(size_t) number_locals; k++)
1428 {
1429 method.stack_map_table[i].locals
1432 method.stack_map_table[i].locals.back();
1434 }
1435 const u2 number_stack_items = read<u2>();
1436 method.stack_map_table[i].stack.resize(number_stack_items);
1437 for(size_t k=0; k<(size_t) number_stack_items; k++)
1438 {
1439 method.stack_map_table[i].stack
1442 method.stack_map_table[i].stack.back();
1444 }
1445 }
1446 else
1447 throw "error: unknown stack frame type encountered";
1448 }
1449 }
1450 else
1452}
1453
1456{
1457 const u1 tag = read<u1>();
1458 switch(tag)
1459 {
1460 case VTYPE_INFO_TOP:
1461 v.type=methodt::verification_type_infot::TOP;
1462 break;
1463 case VTYPE_INFO_INTEGER:
1464 v.type=methodt::verification_type_infot::INTEGER;
1465 break;
1466 case VTYPE_INFO_FLOAT:
1467 v.type=methodt::verification_type_infot::FLOAT;
1468 break;
1469 case VTYPE_INFO_LONG:
1470 v.type=methodt::verification_type_infot::LONG;
1471 break;
1472 case VTYPE_INFO_DOUBLE:
1473 v.type=methodt::verification_type_infot::DOUBLE;
1474 break;
1476 v.type=methodt::verification_type_infot::ITEM_NULL;
1477 break;
1479 v.type=methodt::verification_type_infot::UNINITIALIZED_THIS;
1480 break;
1481 case VTYPE_INFO_OBJECT:
1482 v.type=methodt::verification_type_infot::OBJECT;
1483 v.cpool_index = read<u2>();
1484 break;
1485 case VTYPE_INFO_UNINIT:
1486 v.type=methodt::verification_type_infot::UNINITIALIZED;
1487 v.offset = read<u2>();
1488 break;
1489 default:
1490 throw "error: unknown verification type info encountered";
1491 }
1492}
1493
1495 std::vector<annotationt> &annotations)
1496{
1497 const u2 num_annotations = read<u2>();
1498
1499 for(u2 number=0; number<num_annotations; number++)
1500 {
1501 annotationt annotation;
1502 rRuntimeAnnotation(annotation);
1503 annotations.push_back(annotation);
1504 }
1505}
1506
1508 annotationt &annotation)
1509{
1510 const u2 type_index = read<u2>();
1511 annotation.type=type_entry(type_index);
1513}
1514
1516 annotationt::element_value_pairst &element_value_pairs)
1517{
1519 element_value_pairs.resize(num_element_value_pairs);
1520
1521 for(auto &element_value_pair : element_value_pairs)
1522 {
1523 const u2 element_name_index = read<u2>();
1526 }
1527}
1528
1536{
1537 const u1 tag = read<u1>();
1538
1539 switch(tag)
1540 {
1541 case 'e':
1542 {
1545 // todo: enum
1546 return exprt();
1547 }
1548
1549 case 'c':
1550 {
1551 const u2 class_info_index = read<u2>();
1553 }
1554
1555 case '@':
1556 {
1557 // TODO: return this wrapped in an exprt
1558 // another annotation, recursively
1559 annotationt annotation;
1560 rRuntimeAnnotation(annotation);
1561 return exprt();
1562 }
1563
1564 case '[':
1565 {
1566 const u2 num_values = read<u2>();
1567 exprt::operandst values;
1568 values.reserve(num_values);
1569 for(std::size_t i=0; i<num_values; i++)
1570 {
1571 values.push_back(get_relement_value());
1572 }
1573 return array_exprt(std::move(values), array_typet(typet(), exprt()));
1574 }
1575
1576 case 's':
1577 {
1578 const u2 const_value_index = read<u2>();
1580 }
1581
1582 default:
1583 {
1584 const u2 const_value_index = read<u2>();
1586 }
1587 }
1588}
1589
1602 const u4 &attribute_length)
1603{
1604 classt &parsed_class = parse_tree.parsed_class;
1605 std::string name = parsed_class.name.c_str();
1606 const u2 number_of_classes = read<u2>();
1608 INVARIANT(
1610 "The number of bytes to be read for the InnerClasses attribute does not "
1611 "match the attribute length.");
1612
1613 const auto pool_entry_lambda = [this](u2 index) -> pool_entryt & {
1614 return pool_entry(index);
1615 };
1616 const auto remove_separator_char = [](std::string str, char ch) {
1617 str.erase(std::remove(str.begin(), str.end(), ch), str.end());
1618 return str;
1619 };
1620
1621 for(int i = 0; i < number_of_classes; i++)
1622 {
1625 const u2 inner_name_index = read<u2>();
1627
1628 std::string inner_class_info_name =
1631 bool is_private = (inner_class_access_flags & ACC_PRIVATE) != 0;
1632 bool is_public = (inner_class_access_flags & ACC_PUBLIC) != 0;
1633 bool is_protected = (inner_class_access_flags & ACC_PROTECTED) != 0;
1634 bool is_static = (inner_class_access_flags & ACC_STATIC) != 0;
1635
1636 // If the original parsed class name matches the inner class name,
1637 // the parsed class is an inner class, so overwrite the parsed class'
1638 // access information and mark it as an inner class.
1639 bool is_inner_class = remove_separator_char(id2string(parsed_class.name), '.') ==
1641 if(!is_inner_class)
1642 continue;
1643 parsed_class.is_inner_class = is_inner_class;
1644 parsed_class.is_static_class = is_static;
1645 // This is a marker that a class is anonymous.
1646 if(inner_name_index == 0)
1647 parsed_class.is_anonymous_class = true;
1648 else
1650 // Note that if outer_class_info_index == 0, the inner class is an anonymous
1651 // or local class, and is treated as private.
1652 if(outer_class_info_index == 0)
1653 {
1654 parsed_class.is_private = true;
1655 parsed_class.is_protected = false;
1656 parsed_class.is_public = false;
1657 }
1658 else
1659 {
1660 std::string outer_class_info_name =
1663 parsed_class.outer_class =
1665 parsed_class.is_private = is_private;
1666 parsed_class.is_protected = is_protected;
1667 parsed_class.is_public = is_public;
1668 }
1669 }
1670}
1671
1678{
1680
1681 std::vector<irep_idt> exceptions;
1682 for(size_t i = 0; i < number_of_exceptions; i++)
1683 {
1685 const irep_idt exception_name =
1687 exceptions.push_back(exception_name);
1688 }
1689 return exceptions;
1690}
1691
1693{
1694 classt &parsed_class = parse_tree.parsed_class;
1695
1697 const u4 attribute_length = read<u4>();
1698
1700
1701 if(attribute_name=="SourceFile")
1702 {
1703 const u2 sourcefile_index = read<u2>();
1705
1706 const std::string &fqn(id2string(parsed_class.name));
1707 size_t last_index = fqn.find_last_of('.');
1708 if(last_index==std::string::npos)
1710 else
1711 {
1712 std::string package_name=fqn.substr(0, last_index+1);
1713 std::replace(package_name.begin(), package_name.end(), '.', '/');
1714 const std::string &full_file_name=
1717 }
1718
1719 for(auto &method : parsed_class.methods)
1720 {
1721 method.source_location.set_file(sourcefile_name);
1722 for(auto &instruction : method.instructions)
1723 {
1724 if(!instruction.source_location.get_line().empty())
1725 instruction.source_location.set_file(sourcefile_name);
1726 }
1727 }
1728 }
1729 else if(attribute_name=="Signature")
1730 {
1731 const u2 signature_index = read<u2>();
1734 parsed_class.signature.value(),
1736 }
1737 else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1738 attribute_name=="RuntimeVisibleAnnotations")
1739 {
1741 }
1742 else if(attribute_name == "BootstrapMethods")
1743 {
1744 // for this attribute
1745 // cf. https://docs.oracle.com/javase/specs/jvms/se8/html/jvms-4.html#jvms-4.7.23
1746 INVARIANT(
1747 !parsed_class.attribute_bootstrapmethods_read,
1748 "only one BootstrapMethods argument is allowed in a class file");
1749
1750 // mark as read in parsed class
1751 parsed_class.attribute_bootstrapmethods_read = true;
1753 }
1754 else if(attribute_name == "InnerClasses")
1755 {
1757 }
1758 else
1760}
1761
1763{
1764 const u2 methods_count = read<u2>();
1765
1766 for(std::size_t j=0; j<methods_count; j++)
1767 rmethod();
1768}
1769
1770#define ACC_PUBLIC 0x0001u
1771#define ACC_PRIVATE 0x0002u
1772#define ACC_PROTECTED 0x0004u
1773#define ACC_STATIC 0x0008u
1774#define ACC_FINAL 0x0010u
1775#define ACC_VARARGS 0x0080u
1776#define ACC_SUPER 0x0020u
1777#define ACC_VOLATILE 0x0040u
1778#define ACC_TRANSIENT 0x0080u
1779#define ACC_INTERFACE 0x0200u
1780#define ACC_ABSTRACT 0x0400u
1781#define ACC_SYNTHETIC 0x1000u
1782#define ACC_ANNOTATION 0x2000u
1783#define ACC_ENUM 0x4000u
1784
1786{
1788
1789 const u2 access_flags = read<u2>();
1790 const u2 name_index = read<u2>();
1791 const u2 descriptor_index = read<u2>();
1792
1793 method.is_final=(access_flags&ACC_FINAL)!=0;
1794 method.is_static=(access_flags&ACC_STATIC)!=0;
1796 method.is_public=(access_flags&ACC_PUBLIC)!=0;
1800 method.is_native=(access_flags&ACC_NATIVE)!=0;
1801 method.is_bridge = (access_flags & ACC_BRIDGE) != 0;
1802 method.is_varargs = (access_flags & ACC_VARARGS) != 0;
1803 method.is_synthetic = (access_flags & ACC_SYNTHETIC) != 0;
1804 method.name=pool_entry(name_index).s;
1805 method.base_name=pool_entry(name_index).s;
1806 method.descriptor=id2string(pool_entry(descriptor_index).s);
1807
1808 const auto flags = (method.is_public ? 1 : 0) +
1809 (method.is_protected?1:0)+
1810 (method.is_private?1:0);
1811 DATA_INVARIANT(flags<=1, "at most one of public, protected, private");
1812 const u2 attributes_count = read<u2>();
1813
1814 for(std::size_t j=0; j<attributes_count; j++)
1815 rmethod_attribute(method);
1816}
1817
1818std::optional<java_bytecode_parse_treet> java_bytecode_parse(
1819 std::istream &istream,
1820 const irep_idt &class_name,
1821 message_handlert &message_handler,
1822 bool skip_instructions)
1823{
1825 skip_instructions, message_handler);
1826 java_bytecode_parser.in=&istream;
1827
1829
1830 if(
1831 parser_result ||
1832 java_bytecode_parser.parse_tree.parsed_class.name != class_name)
1833 {
1834 return {};
1835 }
1836
1837 return std::move(java_bytecode_parser.parse_tree);
1838}
1839
1840std::optional<java_bytecode_parse_treet> java_bytecode_parse(
1841 const std::string &file,
1842 const irep_idt &class_name,
1843 message_handlert &message_handler,
1844 bool skip_instructions)
1845{
1846 std::ifstream in(file, std::ios::binary);
1847
1848 if(!in)
1849 {
1850 return {};
1851 }
1852
1853 return java_bytecode_parse(
1854 in, class_name, message_handler, skip_instructions);
1855}
1856
1861{
1863
1864 INVARIANT(
1866 "Local variable type table cannot have more elements "
1867 "than the local variable table.");
1868 for(std::size_t i=0; i<local_variable_type_table_length; i++)
1869 {
1870 const u2 start_pc = read<u2>();
1871 const u2 length = read<u2>();
1872 const u2 name_index = read<u2>();
1873 const u2 signature_index = read<u2>();
1874 const u2 index = read<u2>();
1875
1876 bool found=false;
1877 for(auto &lvar : method.local_variable_table)
1878 {
1879 // compare to entry in LVT
1880 if(lvar.index==index &&
1881 lvar.name==pool_entry(name_index).s &&
1882 lvar.start_pc==start_pc &&
1883 lvar.length==length)
1884 {
1885 found=true;
1887 break;
1888 }
1889 }
1890 INVARIANT(
1891 found,
1892 "Entry in LocalVariableTypeTable must be present in LVT");
1893 }
1894}
1895
1923
1929std::optional<java_bytecode_parsert::lambda_method_handlet>
1931{
1932 const std::function<pool_entryt &(u2)> pool_entry_lambda =
1933 [this](u2 index) -> pool_entryt & { return pool_entry(index); };
1934
1936
1939 ref_entry.get_name_and_type(pool_entry_lambda);
1940
1941 // The following lambda kinds specified in the JVMS (see for example
1942 // https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-5.html#jvms-5.4.3.5
1943 // but which aren't actually created by javac. Java has no syntax for a field-
1944 // refernce like this, and even writing a stereotypical lambda like
1945 // Producer<FieldType> = instance -> instance.field does not generate this
1946 // kind of handle, but rather a synthetic method implementing the getfield
1947 // operation.
1948 // We don't implement a handle kind that can't be produced yet, but should
1949 // they ever turn up this is where to fix them.
1950
1951 if(
1952 entry.get_handle_kind() ==
1954 entry.get_handle_kind() ==
1956 entry.get_handle_kind() ==
1958 entry.get_handle_kind() ==
1960 {
1961 return {};
1962 }
1963
1964 irep_idt class_name =
1966
1968 std::string descriptor = name_and_type.get_descriptor(pool_entry_lambda);
1969 irep_idt mangled_method_name = id2string(method_name) + ":" + descriptor;
1971
1972 method_handle_typet handle_type =
1974
1975 class_method_descriptor_exprt method_descriptor{
1976 method_type, mangled_method_name, class_name, method_name};
1977 lambda_method_handlet lambda_method_handle{method_descriptor, handle_type};
1978
1979 return lambda_method_handle;
1980}
1981
1984{
1986 for(size_t bootstrap_method_index = 0;
1989 {
1992
1994
1996 log.debug() << "INFO: parse BootstrapMethod handle "
1997 << num_bootstrap_arguments << " #args" << messaget::eom;
1998
1999 // read u2 values of entry into vector
2000 std::vector<u2> u2_values(num_bootstrap_arguments);
2001 for(size_t i = 0; i < num_bootstrap_arguments; i++)
2002 u2_values[i] = read<u2>();
2003
2004 // try parsing bootstrap method handle
2005 // each entry contains a MethodHandle structure
2006 // u2 tag
2007 // u2 reference kind which must be in the range from 1 to 9
2008 // u2 reference index into the constant pool
2009 //
2010 // reference kinds use the following
2011 // 1 to 4 must point to a CONSTANT_Fieldref structure
2012 // 5 or 8 must point to a CONSTANT_Methodref structure
2013 // 6 or 7 must point to a CONSTANT_Methodref or
2014 // CONSTANT_InterfaceMethodref structure, if the class file version
2015 // number is 52.0 or above, to a CONSTANT_Methodref only in the case
2016 // of less than 52.0
2017 // 9 must point to a CONSTANT_InterfaceMethodref structure
2018
2019 // the index must point to a CONSTANT_String
2020 // CONSTANT_Class
2021 // CONSTANT_Integer
2022 // CONSTANT_Long
2023 // CONSTANT_Float
2024 // CONSTANT_Double
2025 // CONSTANT_MethodHandle
2026 // CONSTANT_MethodType
2027
2028 // We read the three arguments here to see whether they correspond to
2029 // our hypotheses for this being a lambda function entry.
2030
2031 // Need at least 3 arguments, the interface type, the method hanlde
2032 // and the method_type, otherwise it doesn't look like a call that we
2033 // understand
2035 {
2037 log.debug()
2038 << "format of BootstrapMethods entry not recognized: too few arguments"
2039 << messaget::eom;
2040 continue;
2041 }
2042
2046
2047 // The additional arguments for the altmetafactory call are skipped,
2048 // as they are currently not used. We verify though that they are of
2049 // CONSTANT_Integer type, cases where this does not hold will be
2050 // analyzed further.
2051 bool recognized = true;
2052 for(size_t i = 3; i < num_bootstrap_arguments; i++)
2053 {
2056 }
2057
2058 if(!recognized)
2059 {
2060 log.debug() << "format of BootstrapMethods entry not recognized: extra "
2061 "arguments of wrong type"
2062 << messaget::eom;
2064 continue;
2065 }
2066
2071
2072 if(
2076 {
2077 log.debug()
2078 << "format of BootstrapMethods entry not recognized: arguments "
2079 "wrong type"
2080 << messaget::eom;
2082 continue;
2083 }
2084
2085 log.debug() << "INFO: parse lambda handle" << messaget::eom;
2086 std::optional<lambda_method_handlet> lambda_method_handle =
2088
2089 if(!lambda_method_handle.has_value())
2090 {
2091 log.debug() << "format of BootstrapMethods entry not recognized: method "
2092 "handle not recognised"
2093 << messaget::eom;
2095 continue;
2096 }
2097
2098 // If parse_method_handle can't parse the lambda method, it should return {}
2100 lambda_method_handle->handle_type != method_handle_typet::UNKNOWN_HANDLE);
2101
2102 log.debug()
2103 << "lambda function reference "
2104 << id2string(
2105 lambda_method_handle->get_method_descriptor().base_method_name())
2106 << " in class \"" << parse_tree.parsed_class.name << "\""
2107 << "\n interface type is "
2109 << "\n method type is "
2113 }
2114}
2115
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
struct bytecode_infot const bytecode_info[]
#define BC_wide
uint16_t u2
uint32_t u4
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:562
Array constructor from list of elements.
Definition std_expr.h:1621
Arrays with given size.
Definition std_types.h:807
class_infot get_class(const pool_entry_lookupt &pool_entry) const
name_and_type_infot get_name_and_type(const pool_entry_lookupt &pool_entry) const
u2 get_name_and_type_index() const
base_ref_infot(const pool_entryt &entry)
Corresponds to the CONSTANT_Class_info Structure Described in Java 8 specification 4....
class_infot(const pool_entryt &entry)
std::string get_name(const pool_entry_lookupt &pool_entry) const
An expression describing a method on a class.
Definition std_expr.h:3634
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition dstring.h:38
bool empty() const
Definition dstring.h:89
const char * c_str() const
Definition dstring.h:116
Base class for all expressions.
Definition expr.h:56
std::vector< exprt > operandst
Definition expr.h:58
typet & type()
Return the type of the expression.
Definition expr.h:84
const irep_idt & get(const irep_idt &name) const
Definition irep.cpp:44
const irep_idt & id() const
Definition irep.h:388
pool_entryt & pool_entry(u2 index)
void get_annotation_class_refs(const std::vector< annotationt > &annotations)
For each of the given annotations, get a reference to its class and recursively get class references ...
void store_unknown_method_handle(size_t bootstrap_method_index)
Creates an unknown method handle and puts it into the parsed_class.
exprt get_relement_value()
Corresponds to the element_value structure Described in Java 8 specification 4.7.16....
java_bytecode_parse_treet::annotationt annotationt
java_bytecode_parse_treet::methodt methodt
void rRuntimeAnnotation_attribute(std::vector< annotationt > &)
const typet type_entry(u2 index)
void rbytecode(std::vector< instructiont > &)
void rcode_attribute(methodt &method)
void read_bootstrapmethods_entry()
Read all entries of the BootstrapMethods attribute of a class file.
void get_class_refs()
Get the class references for the benefit of a dependency analysis.
void get_annotation_value_class_refs(const exprt &value)
See java_bytecode_parsert::get_annotation_class_refs.
java_bytecode_parse_treet parse_tree
std::vector< pool_entryt > constant_poolt
java_bytecode_parsert(bool skip_instructions, message_handlert &message_handler)
void skip_bytes(std::size_t bytes)
void read_verification_type_info(methodt::verification_type_infot &)
java_bytecode_parse_treet::fieldt fieldt
void get_class_refs_rec(const typet &)
void parse_local_variable_type_table(methodt &method)
Parses the local variable type table of a method.
void rRuntimeAnnotation(annotationt &)
std::optional< lambda_method_handlet > parse_method_handle(const class method_handle_infot &entry)
Read method handle pointed to from constant pool entry at index, return type of method handle and nam...
void relement_value_pairs(annotationt::element_value_pairst &)
void rinner_classes_attribute(const u4 &attribute_length)
Corresponds to the element_value structure Described in Java 8 specification 4.7.6 https://docs....
std::vector< irep_idt > rexceptions_attribute()
Corresponds to the element_value structure Described in Java 8 specification 4.7.5 https://docs....
void rmethod_attribute(methodt &method)
method_handle_kindt
Indicates what sort of code should be synthesised for a lambda call:
Definition java_types.h:464
@ LAMBDA_STATIC_METHOD_HANDLE
Direct call to the given method.
@ LAMBDA_VIRTUAL_METHOD_HANDLE
Virtual call to the given interface or method.
@ LAMBDA_CONSTRUCTOR_HANDLE
Instantiate the needed type then call a constructor.
mstreamt & error() const
Definition message.h:391
mstreamt & debug() const
Definition message.h:421
static eomt eom
Definition message.h:289
method_handle_infot(const pool_entryt &entry)
base_ref_infot get_reference(const pool_entry_lookupt &pool_entry) const
method_handle_kindt handle_kind
method_handle_kindt get_handle_kind() const
method_handle_kindt
Correspond to the different valid values for field handle_kind From Java 8 spec 4....
Corresponds to the CONSTANT_NameAndType_info Structure Described in Java 8 specification 4....
name_and_type_infot(const pool_entryt &entry)
std::string get_descriptor(const pool_entry_lookupt &pool_entry) const
std::string get_name(const pool_entry_lookupt &pool_entry) const
std::istream * in
Definition parser.h:27
messaget log
Definition parser.h:144
Fixed-width bit-vector with two's complement interpretation.
const irep_idt & get_function() const
void set_java_bytecode_index(const irep_idt &index)
void set_line(const irep_idt &line)
void set_function(const irep_idt &function)
A struct tag type, i.e., struct_typet with an identifier.
Definition std_types.h:493
Structure type, corresponds to C style structs.
Definition std_types.h:231
std::function< pool_entryt &(u2)> pool_entry_lookupt
static std::string read_utf8_constant(const pool_entryt &entry)
structured_pool_entryt(const pool_entryt &entry)
static symbol_exprt typeless(const irep_idt &id)
Generate a symbol_exprt without a proper type.
Definition std_expr.h:150
const irep_idt & get_identifier() const
Definition std_types.h:410
An expression denoting a type.
Definition std_expr.h:3082
The type of an expression, extends irept.
Definition type.h:29
Fixed-width bit-vector with unsigned binary interpretation.
static format_containert< T > format(const T &o)
Definition format.h:37
const std::string & id2string(const irep_idt &d)
Definition irep.h:44
#define CONSTANT_Fieldref
#define CONSTANT_String
#define VTYPE_INFO_DOUBLE
#define UNUSED_u2(x)
#define ACC_PUBLIC
#define CONSTANT_InvokeDynamic
#define VTYPE_INFO_FLOAT
#define ACC_BRIDGE
#define ACC_NATIVE
#define T_SHORT
#define ACC_ENUM
#define CONSTANT_Methodref
#define T_LONG
#define T_FLOAT
#define ACC_VARARGS
#define CONSTANT_Long
#define T_INT
#define VTYPE_INFO_LONG
#define VTYPE_INFO_TOP
#define CONSTANT_NameAndType
#define ACC_ABSTRACT
#define VTYPE_INFO_OBJECT
#define ACC_ANNOTATION
#define T_DOUBLE
#define ACC_SYNCHRONIZED
#define T_BYTE
#define CONSTANT_Class
#define ACC_STATIC
#define CONSTANT_Integer
std::optional< java_bytecode_parse_treet > java_bytecode_parse(std::istream &istream, const irep_idt &class_name, message_handlert &message_handler, bool skip_instructions)
Attempt to parse a Java class from the given stream.
#define CONSTANT_MethodType
#define VTYPE_INFO_INTEGER
static java_class_typet::method_handle_kindt get_method_handle_type(method_handle_infot::method_handle_kindt java_handle_kind)
Translate the lambda method reference kind in a class file into the kind of handling the method requi...
#define ACC_PROTECTED
#define CONSTANT_Double
#define ACC_PRIVATE
#define CONSTANT_Float
#define VTYPE_INFO_UNINIT
#define CONSTANT_InterfaceMethodref
#define T_BOOLEAN
#define ACC_INTERFACE
#define ACC_SYNTHETIC
#define VTYPE_INFO_UNINIT_THIS
#define ACC_FINAL
#define CONSTANT_MethodHandle
#define T_CHAR
#define VTYPE_INFO_ITEM_NULL
#define CONSTANT_Utf8
Representation of a constant Java string.
void get_dependencies_from_generic_parameters(const std::string &signature, std::set< irep_idt > &refs)
Collect information about generic type parameters from a given signature.
struct_tag_typet java_classname(const std::string &id)
std::optional< typet > java_type_from_string(const std::string &src, const std::string &class_name_prefix)
Transforms a string representation of a Java type into an internal type representation thereof.
bool is_java_array_tag(const irep_idt &tag)
See above.
const typet & java_array_element_type(const struct_tag_typet &array_symbol)
Return a const reference to the element type of a given java array type.
const java_method_typet & to_java_method_type(const typet &type)
Definition java_types.h:183
std::optional< typet > java_type_from_string_with_exception(const std::string &descriptor, const std::optional< std::string > &signature, const std::string &class_name)
static std::optional< java_class_typet::java_lambda_method_handlet > lambda_method_handle(const symbol_table_baset &symbol_table, const irep_idt &method_identifier, const java_method_typet &dynamic_method_type)
Parser utilities.
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition invariant.h:534
#define PRECONDITION(CONDITION)
Definition invariant.h:463
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition invariant.h:423
#define POSTCONDITION(CONDITION)
Definition invariant.h:479
API to expression classes.
const struct_typet & to_struct_type(const typet &type)
Cast a typet to a struct_typet.
Definition std_types.h:308
const struct_tag_typet & to_struct_tag_type(const typet &type)
Cast a typet to a struct_tag_typet.
Definition std_types.h:518
std::vector< element_value_pairt > element_value_pairst
void add_method_handle(size_t bootstrap_index, const lambda_method_handlet &handle)
std::vector< annotationst > parameter_annotations
Java annotations that were applied to parameters of this method.