cprover
java_bytecode_parser.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module:
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
9 #include "java_bytecode_parser.h"
10 
11 #include <algorithm>
12 #include <fstream>
13 #include <map>
14 #include <string>
15 
16 #include <util/arith_tools.h>
17 #include <util/ieee_float.h>
18 #include <util/parser.h>
19 #include <util/prefix.h>
20 #include <util/std_expr.h>
21 #include <util/string_constant.h>
22 #include <util/optional.h>
23 
25 #include "java_types.h"
26 #include "bytecode_info.h"
27 
28 #ifdef DEBUG
29 #include <iostream>
30 #endif
31 
33 {
34 public:
37  {
39  }
40 
41  virtual bool parse();
42 
57 
59 
60  struct pool_entryt
61  {
67  pool_entryt():tag(0), ref1(0), ref2(0), number(0) { }
68  };
69 
70  typedef std::vector<pool_entryt> constant_poolt;
72 
73 protected:
74  class bytecodet
75  {
76  public:
78  char format;
79  };
80 
81  std::vector<bytecodet> bytecodes;
82  const bool skip_instructions = false;
83 
85  {
86  if(index==0 || index>=constant_pool.size())
87  {
88  error() << "invalid constant pool index (" << index << ")" << eom;
89  error() << "constant pool size: " << constant_pool.size() << eom;
90  throw 0;
91  }
92 
93  return constant_pool[index];
94  }
95 
96  exprt &constant(u2 index)
97  {
98  return pool_entry(index).expr;
99  }
100 
101  const typet type_entry(u2 index)
102  {
103  return java_type_from_string(id2string(pool_entry(index).s));
104  }
105 
107  {
108  // This is only useful for rbytecodes, which in turn is only useful to
109  // parse method instructions.
111  return;
112 
113  // pre-hash the mnemonics, so we do this only once
114  bytecodes.resize(256);
115  for(const bytecode_infot *p=bytecode_info; p->mnemonic!=nullptr; p++)
116  {
117  assert(p->opcode<bytecodes.size());
118  bytecodes[p->opcode].mnemonic=p->mnemonic;
119  bytecodes[p->opcode].format=p->format;
120  }
121  }
122 
123  void rClassFile();
124  void rconstant_pool();
125  void rinterfaces(classt &parsed_class);
126  void rfields(classt &parsed_class);
127  void rmethods(classt &parsed_class);
128  void rmethod(classt &parsed_class);
129  void
130  rinner_classes_attribute(classt &parsed_class, const u4 &attribute_length);
131  std::vector<irep_idt> rexceptions_attribute();
132  void rclass_attribute(classt &parsed_class);
137  void rmethod_attribute(methodt &method);
138  void rfield_attribute(fieldt &);
139  void rcode_attribute(methodt &method);
140  void read_verification_type_info(methodt::verification_type_infot &);
142  void get_class_refs();
143  void get_class_refs_rec(const typet &);
145  const java_bytecode_parse_treet::annotationst &annotations);
146  void get_annotation_value_class_refs(const exprt &value);
149  parse_method_handle(const class method_handle_infot &entry);
151 
152  void skip_bytes(std::size_t bytes)
153  {
154  for(std::size_t i=0; i<bytes; i++)
155  {
156  if(!*in)
157  {
158  error() << "unexpected end of bytecode file" << eom;
159  throw 0;
160  }
161  in->get();
162  }
163  }
164 
165  u8 read_bytes(size_t bytes)
166  {
167  u8 result=0;
168  for(size_t i=0; i<bytes; i++)
169  {
170  if(!*in)
171  {
172  error() << "unexpected end of bytecode file" << eom;
173  throw 0;
174  }
175  result<<=8;
176  result|=in->get();
177  }
178  return result;
179  }
180 
182  {
183  return (u1)read_bytes(1);
184  }
185 
186  inline u2 read_u2()
187  {
188  return (u2)read_bytes(2);
189  }
190 
192  {
193  return (u4)read_bytes(4);
194  }
195 
197  {
198  return read_bytes(8);
199  }
200 
202  classt &parsed_class,
203  size_t bootstrap_method_index,
204  u2_valuest u2_values) const;
205 };
206 
207 #define CONSTANT_Class 7
208 #define CONSTANT_Fieldref 9
209 #define CONSTANT_Methodref 10
210 #define CONSTANT_InterfaceMethodref 11
211 #define CONSTANT_String 8
212 #define CONSTANT_Integer 3
213 #define CONSTANT_Float 4
214 #define CONSTANT_Long 5
215 #define CONSTANT_Double 6
216 #define CONSTANT_NameAndType 12
217 #define CONSTANT_Utf8 1
218 #define CONSTANT_MethodHandle 15
219 #define CONSTANT_MethodType 16
220 #define CONSTANT_InvokeDynamic 18
221 
222 #define VTYPE_INFO_TOP 0
223 #define VTYPE_INFO_INTEGER 1
224 #define VTYPE_INFO_FLOAT 2
225 #define VTYPE_INFO_LONG 3
226 #define VTYPE_INFO_DOUBLE 4
227 #define VTYPE_INFO_ITEM_NULL 5
228 #define VTYPE_INFO_UNINIT_THIS 6
229 #define VTYPE_INFO_OBJECT 7
230 #define VTYPE_INFO_UNINIT 8
231 
233 {
234 public:
236  : tag(entry.tag)
237  {
238  }
239 
240  u1 get_tag() const
241  {
242  return tag;
243  }
244 
245  typedef std::function<java_bytecode_parsert::pool_entryt &(u2)>
248 
249 protected:
250  static std::string read_utf8_constant(const pool_entryt &entry)
251  {
252  INVARIANT(
253  entry.tag == CONSTANT_Utf8, "Name entry must be a constant UTF-8");
254  return id2string(entry.s);
255  }
256 
257 private:
259 };
260 
264 {
265 public:
266  explicit class_infot(const pool_entryt &entry) : structured_pool_entryt(entry)
267  {
268  PRECONDITION(entry.tag == CONSTANT_Class);
269  name_index = entry.ref1;
270  }
271 
272  std::string get_name(pool_entry_lookupt pool_entry) const
273  {
274  const pool_entryt &name_entry = pool_entry(name_index);
275  return read_utf8_constant(name_entry);
276  }
277 
278 private:
280 };
281 
285 {
286 public:
288  : structured_pool_entryt(entry)
289  {
291  name_index = entry.ref1;
292  descriptor_index = entry.ref2;
293  }
294 
295  std::string get_name(pool_entry_lookupt pool_entry) const
296  {
297  const pool_entryt &name_entry = pool_entry(name_index);
298  return read_utf8_constant(name_entry);
299  }
300 
301  std::string get_descriptor(pool_entry_lookupt pool_entry) const
302  {
303  const pool_entryt &descriptor_entry = pool_entry(descriptor_index);
304  return read_utf8_constant(descriptor_entry);
305  }
306 
307 private:
310 };
311 
313 {
314 public:
316  {
317  PRECONDITION(
318  entry.tag == CONSTANT_Fieldref || entry.tag == CONSTANT_Methodref ||
320  class_index = entry.ref1;
321  name_and_type_index = entry.ref2;
322  }
323 
325  {
326  return class_index;
327  }
329  {
330  return name_and_type_index;
331  }
332 
334  {
335  const pool_entryt &name_and_type_entry = pool_entry(name_and_type_index);
336 
337  INVARIANT(
338  name_and_type_entry.tag == CONSTANT_NameAndType,
339  "name_and_typeindex did not correspond to a name_and_type in the "
340  "constant pool");
341 
342  return name_and_type_infot{name_and_type_entry};
343  }
344 
346  {
347  const pool_entryt &class_entry = pool_entry(class_index);
348 
349  return class_infot{class_entry};
350  }
351 
352 private:
355 };
356 
358 {
359 public:
364  {
365  REF_getField = 1,
366  REF_getStatic = 2,
367  REF_putField = 3,
368  REF_putStatic = 4,
369  REF_invokeVirtual = 5,
370  REF_invokeStatic = 6,
371  REF_invokeSpecial = 7,
374  };
375 
377  : structured_pool_entryt(entry)
378  {
380  PRECONDITION(entry.ref1 > 0 && entry.ref1 < 10); // Java 8 spec 4.4.8
381  reference_kind = static_cast<method_handle_kindt>(entry.ref1);
382  reference_index = entry.ref2;
383  }
384 
386  {
387  const base_ref_infot ref_entry{pool_entry(reference_index)};
388 
389  // validate the correctness of the constant pool entry
390  switch(reference_kind)
391  {
396  {
397  INVARIANT(ref_entry.get_tag() == CONSTANT_Fieldref, "4.4.2");
398  break;
399  }
402  {
403  INVARIANT(ref_entry.get_tag() == CONSTANT_Methodref, "4.4.2");
404  break;
405  }
408  {
409  INVARIANT(
410  ref_entry.get_tag() == CONSTANT_Methodref ||
411  ref_entry.get_tag() == CONSTANT_InterfaceMethodref,
412  "4.4.2");
413  break;
414  }
416  {
417  INVARIANT(ref_entry.get_tag() == CONSTANT_InterfaceMethodref, "");
418  break;
419  }
420  }
421 
422  return ref_entry;
423  }
424 
425 private:
428 };
429 
431 {
432  try
433  {
434  rClassFile();
435  }
436 
437  catch(const char *message)
438  {
439  error() << message << eom;
440  return true;
441  }
442 
443  catch(const std::string &message)
444  {
445  error() << message << eom;
446  return true;
447  }
448 
449  catch(...)
450  {
451  error() << "parsing error" << eom;
452  return true;
453  }
454 
455  return false;
456 }
457 
458 #define ACC_PUBLIC 0x0001
459 #define ACC_PRIVATE 0x0002
460 #define ACC_PROTECTED 0x0004
461 #define ACC_STATIC 0x0008
462 #define ACC_FINAL 0x0010
463 #define ACC_SYNCHRONIZED 0x0020
464 #define ACC_BRIDGE 0x0040
465 #define ACC_NATIVE 0x0100
466 #define ACC_INTERFACE 0x0200
467 #define ACC_ABSTRACT 0x0400
468 #define ACC_STRICT 0x0800
469 #define ACC_SYNTHETIC 0x1000
470 #define ACC_ANNOTATION 0x2000
471 #define ACC_ENUM 0x4000
472 
473 #define UNUSED_u2(x) { const u2 x = read_u2(); (void)x; } (void)0
474 
476 {
478 
479  u4 magic=read_u4();
480  UNUSED_u2(minor_version);
481  u2 major_version=read_u2();
482 
483  if(magic!=0xCAFEBABE)
484  {
485  error() << "wrong magic" << eom;
486  throw 0;
487  }
488 
489  if(major_version<44)
490  {
491  error() << "unexpected major version" << eom;
492  throw 0;
493  }
494 
495  rconstant_pool();
496 
497  classt &parsed_class=parse_tree.parsed_class;
498 
499  u2 access_flags=read_u2();
500  u2 this_class=read_u2();
501  u2 super_class=read_u2();
502 
503  parsed_class.is_abstract=(access_flags&ACC_ABSTRACT)!=0;
504  parsed_class.is_enum=(access_flags&ACC_ENUM)!=0;
505  parsed_class.is_public=(access_flags&ACC_PUBLIC)!=0;
506  parsed_class.is_protected=(access_flags&ACC_PROTECTED)!=0;
507  parsed_class.is_private=(access_flags&ACC_PRIVATE)!=0;
508  parsed_class.is_final = (access_flags & ACC_FINAL) != 0;
509  parsed_class.is_interface = (access_flags & ACC_INTERFACE) != 0;
510  parsed_class.is_synthetic = (access_flags & ACC_SYNTHETIC) != 0;
511  parsed_class.is_annotation = (access_flags & ACC_ANNOTATION) != 0;
512  parsed_class.name=
513  constant(this_class).type().get(ID_C_base_name);
514 
515  if(super_class!=0)
516  parsed_class.super_class = constant(super_class).type().get(ID_C_base_name);
517 
518  rinterfaces(parsed_class);
519  rfields(parsed_class);
520  rmethods(parsed_class);
521 
522  // count elements of enum
523  if(parsed_class.is_enum)
524  for(fieldt &field : parse_tree.parsed_class.fields)
525  if(field.is_enum)
527 
528  u2 attributes_count=read_u2();
529 
530  for(std::size_t j=0; j<attributes_count; j++)
531  rclass_attribute(parsed_class);
532 
533  get_class_refs();
534 
536 }
537 
540 {
541  for(const auto &c : constant_pool)
542  {
543  switch(c.tag)
544  {
545  case CONSTANT_Class:
546  get_class_refs_rec(c.expr.type());
547  break;
548 
550  {
553  }
554  break;
555 
556  default: {}
557  }
558  }
559 
561 
562  for(const auto &field : parse_tree.parsed_class.fields)
563  {
564  get_annotation_class_refs(field.annotations);
565  typet field_type;
566  if(field.signature.has_value())
567  {
569  field.descriptor,
570  field.signature,
572 
573  // add generic type args to class refs as dependencies, same below for
574  // method types and entries from the local variable type table
576  field_type, parse_tree.class_refs);
577  }
578  else
579  field_type=java_type_from_string(field.descriptor);
580 
581  get_class_refs_rec(field_type);
582  }
583 
584  for(const auto &method : parse_tree.parsed_class.methods)
585  {
586  get_annotation_class_refs(method.annotations);
587  for(const auto &parameter_annotations : method.parameter_annotations)
588  get_annotation_class_refs(parameter_annotations);
589  typet method_type;
590  if(method.signature.has_value())
591  {
593  method.descriptor,
594  method.signature,
597  method_type, parse_tree.class_refs);
598  }
599  else
600  method_type=java_type_from_string(method.descriptor);
601 
602  get_class_refs_rec(method_type);
603  for(const auto &var : method.local_variable_table)
604  {
605  typet var_type;
606  if(var.signature.has_value())
607  {
609  var.descriptor,
610  var.signature,
613  var_type, parse_tree.class_refs);
614  }
615  else
616  var_type=java_type_from_string(var.descriptor);
617  get_class_refs_rec(var_type);
618  }
619  }
620 }
621 
623 {
624  if(src.id()==ID_code)
625  {
626  const java_method_typet &ct = to_java_method_type(src);
627  const typet &rt=ct.return_type();
628  get_class_refs_rec(rt);
629  for(const auto &p : ct.parameters())
630  get_class_refs_rec(p.type());
631  }
632  else if(src.id() == ID_struct_tag)
633  {
634  irep_idt name=src.get(ID_C_base_name);
635  if(has_prefix(id2string(name), "array["))
636  {
637  const typet &element_type =
638  static_cast<const typet &>(src.find(ID_element_type));
639  get_class_refs_rec(element_type);
640  }
641  else
642  parse_tree.class_refs.insert(name);
643  }
644  else if(src.id()==ID_struct)
645  {
646  const struct_typet &struct_type=to_struct_type(src);
647  for(const auto &c : struct_type.components())
648  get_class_refs_rec(c.type());
649  }
650  else if(src.id()==ID_pointer)
652 }
653 
657  const java_bytecode_parse_treet::annotationst &annotations)
658 {
659  for(const auto &annotation : annotations)
660  {
661  get_class_refs_rec(annotation.type);
662  for(const auto &element_value_pair : annotation.element_value_pairs)
663  get_annotation_value_class_refs(element_value_pair.value);
664  }
665 }
666 
671 {
672  if(const auto &symbol_expr = expr_try_dynamic_cast<symbol_exprt>(value))
673  {
674  const irep_idt &value_id = symbol_expr->get_identifier();
675  const typet value_type = java_type_from_string(id2string(value_id));
676  get_class_refs_rec(value_type);
677  }
678  else if(const auto &array_expr = expr_try_dynamic_cast<array_exprt>(value))
679  {
680  for(const exprt &operand : array_expr->operands())
682  }
683  // TODO: enum and nested annotation cases (once these are correctly parsed by
684  // get_relement_value).
685  // Note that in the cases where expr is a string or primitive type, no
686  // additional class references are needed.
687 }
688 
690 {
691  u2 constant_pool_count=read_u2();
692  if(constant_pool_count==0)
693  {
694  error() << "invalid constant_pool_count" << eom;
695  throw 0;
696  }
697 
698  constant_pool.resize(constant_pool_count);
699 
700  for(constant_poolt::iterator
701  it=constant_pool.begin();
702  it!=constant_pool.end();
703  it++)
704  {
705  // the first entry isn't used
706  if(it==constant_pool.begin())
707  continue;
708 
709  it->tag=read_u1();
710 
711  switch(it->tag)
712  {
713  case CONSTANT_Class:
714  it->ref1=read_u2();
715  break;
716 
717  case CONSTANT_Fieldref:
718  case CONSTANT_Methodref:
722  it->ref1=read_u2();
723  it->ref2=read_u2();
724  break;
725 
726  case CONSTANT_String:
727  case CONSTANT_MethodType:
728  it->ref1=read_u2();
729  break;
730 
731  case CONSTANT_Integer:
732  case CONSTANT_Float:
733  it->number=read_u4();
734  break;
735 
736  case CONSTANT_Long:
737  case CONSTANT_Double:
738  it->number=read_u8();
739  // Eight-byte constants take up two entries
740  // in the constant_pool table, for annoying this programmer.
741  if(it==constant_pool.end())
742  {
743  error() << "invalid double entry" << eom;
744  throw 0;
745  }
746  it++;
747  it->tag=0;
748  break;
749 
750  case CONSTANT_Utf8:
751  {
752  u2 bytes=read_u2();
753  std::string s;
754  s.resize(bytes);
755  for(std::string::iterator s_it=s.begin(); s_it!=s.end(); s_it++)
756  *s_it=read_u1();
757  it->s=s; // hashes
758  }
759  break;
760 
762  it->ref1=read_u1();
763  it->ref2=read_u2();
764  break;
765 
766  default:
767  error() << "unknown constant pool entry (" << it->tag << ")"
768  << eom;
769  throw 0;
770  }
771  }
772 
773  // we do a bit of post-processing after we have them all
774  for(constant_poolt::iterator
775  it=constant_pool.begin();
776  it!=constant_pool.end();
777  it++)
778  {
779  // the first entry isn't used
780  if(it==constant_pool.begin())
781  continue;
782 
783  switch(it->tag)
784  {
785  case CONSTANT_Class:
786  {
787  const std::string &s=id2string(pool_entry(it->ref1).s);
788  it->expr=type_exprt(java_classname(s));
789  }
790  break;
791 
792  case CONSTANT_Fieldref:
793  {
794  const pool_entryt &nameandtype_entry=pool_entry(it->ref2);
795  const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
796  const pool_entryt &class_entry=pool_entry(it->ref1);
797  const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
798  typet type=type_entry(nameandtype_entry.ref2);
799 
800  auto class_tag = java_classname(id2string(class_name_entry.s));
801 
802  fieldref_exprt fieldref(type, name_entry.s, class_tag.get_identifier());
803 
804  it->expr=fieldref;
805  }
806  break;
807 
808  case CONSTANT_Methodref:
810  {
811  const pool_entryt &nameandtype_entry=pool_entry(it->ref2);
812  const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
813  const pool_entryt &class_entry=pool_entry(it->ref1);
814  const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
815  typet type=type_entry(nameandtype_entry.ref2);
816 
817  auto class_tag = java_classname(id2string(class_name_entry.s));
818 
819  irep_idt component_name=
820  id2string(name_entry.s)+
821  ":"+id2string(pool_entry(nameandtype_entry.ref2).s);
822 
823  irep_idt class_name = class_tag.get_identifier();
824 
825  irep_idt identifier=
826  id2string(class_name)+"."+id2string(component_name);
827 
828  exprt virtual_function(ID_virtual_function, type);
829  virtual_function.set(ID_component_name, component_name);
830  virtual_function.set(ID_C_class, class_name);
831  virtual_function.set(ID_C_base_name, name_entry.s);
832  virtual_function.set(ID_identifier, identifier);
833 
834  it->expr=virtual_function;
835  }
836  break;
837 
838  case CONSTANT_String:
839  {
840  // ldc turns these into references to java.lang.String
841  exprt string_literal(ID_java_string_literal);
842  string_literal.set(ID_value, pool_entry(it->ref1).s);
843  it->expr=string_literal;
844  }
845  break;
846 
847  case CONSTANT_Integer:
848  it->expr=from_integer(it->number, java_int_type());
849  break;
850 
851  case CONSTANT_Float:
852  {
854  value.unpack(it->number);
855  it->expr=value.to_expr();
856  }
857  break;
858 
859  case CONSTANT_Long:
860  it->expr=from_integer(it->number, java_long_type());
861  break;
862 
863  case CONSTANT_Double:
864  {
866  value.unpack(it->number);
867  it->expr=value.to_expr();
868  }
869  break;
870 
872  {
873  it->expr.id("nameandtype");
874  }
875  break;
876 
878  {
879  it->expr.id("methodhandle");
880  }
881  break;
882 
883  case CONSTANT_MethodType:
884  {
885  it->expr.id("methodtype");
886  }
887  break;
888 
890  {
891  it->expr.id("invokedynamic");
892  const pool_entryt &nameandtype_entry=pool_entry(it->ref2);
893  typet type=type_entry(nameandtype_entry.ref2);
894  type.set(ID_java_lambda_method_handle_index, it->ref1);
895  it->expr.type()=type;
896  }
897  break;
898 
899  default:{};
900  }
901  }
902 }
903 
905 {
906  u2 interfaces_count=read_u2();
907 
908  for(std::size_t i=0; i<interfaces_count; i++)
909  parsed_class.implements
910  .push_back(constant(read_u2()).type().get(ID_C_base_name));
911 }
912 
914 {
915  u2 fields_count=read_u2();
916 
917  for(std::size_t i=0; i<fields_count; i++)
918  {
919  fieldt &field=parsed_class.add_field();
920 
921  u2 access_flags=read_u2();
922  u2 name_index=read_u2();
923  u2 descriptor_index=read_u2();
924  u2 attributes_count=read_u2();
925 
926  field.name=pool_entry(name_index).s;
927  field.is_static=(access_flags&ACC_STATIC)!=0;
928  field.is_final=(access_flags&ACC_FINAL)!=0;
929  field.is_enum=(access_flags&ACC_ENUM)!=0;
930 
931  field.descriptor=id2string(pool_entry(descriptor_index).s);
932  field.is_public=(access_flags&ACC_PUBLIC)!=0;
933  field.is_protected=(access_flags&ACC_PROTECTED)!=0;
934  field.is_private=(access_flags&ACC_PRIVATE)!=0;
935  const auto flags = (field.is_public ? 1 : 0) +
936  (field.is_protected?1:0)+
937  (field.is_private?1:0);
938  DATA_INVARIANT(flags<=1, "at most one of public, protected, private");
939 
940  for(std::size_t j=0; j<attributes_count; j++)
941  rfield_attribute(field);
942  }
943 }
944 
945 #define T_BOOLEAN 4
946 #define T_CHAR 5
947 #define T_FLOAT 6
948 #define T_DOUBLE 7
949 #define T_BYTE 8
950 #define T_SHORT 9
951 #define T_INT 10
952 #define T_LONG 11
953 
955  methodt::instructionst &instructions)
956 {
957  INVARIANT(
958  bytecodes.size() == 256, "bytecode mnemonics should have been populated");
959 
960  u4 code_length=read_u4();
961 
962  u4 address;
963  size_t bytecode_index=0; // index of bytecode instruction
964 
965  for(address=0; address<code_length; address++)
966  {
967  bool wide_instruction=false;
968  u4 start_of_instruction=address;
969 
970  u1 bytecode=read_u1();
971 
972  if(bytecode==0xc4) // wide
973  {
974  wide_instruction=true;
975  address++;
976  bytecode=read_u1();
977  // The only valid instructions following a wide byte are
978  // [ifald]load, [ifald]store, ret and iinc
979  // All of these have either format of v, or V
980  INVARIANT(
981  bytecodes[bytecode].format == 'v' || bytecodes[bytecode].format == 'V',
982  "Unexpected wide instruction: " +
983  id2string(bytecodes[bytecode].mnemonic));
984  }
985 
986  instructions.push_back(instructiont());
987  instructiont &instruction=instructions.back();
988  instruction.statement=bytecodes[bytecode].mnemonic;
989  instruction.address=start_of_instruction;
990  instruction.source_location
991  .set_java_bytecode_index(std::to_string(bytecode_index));
992 
993  switch(bytecodes[bytecode].format)
994  {
995  case ' ': // no further bytes
996  break;
997 
998  case 'c': // a constant_pool index (one byte)
999  if(wide_instruction)
1000  {
1001  instruction.args.push_back(constant(read_u2()));
1002  address+=2;
1003  }
1004  else
1005  {
1006  instruction.args.push_back(constant(read_u1()));
1007  address+=1;
1008  }
1009  break;
1010 
1011  case 'C': // a constant_pool index (two bytes)
1012  instruction.args.push_back(constant(read_u2()));
1013  address+=2;
1014  break;
1015 
1016  case 'b': // a signed byte
1017  {
1018  s1 c=read_u1();
1019  instruction.args.push_back(from_integer(c, signedbv_typet(8)));
1020  }
1021  address+=1;
1022  break;
1023 
1024  case 'o': // two byte branch offset, signed
1025  {
1026  s2 offset=read_u2();
1027  // By converting the signed offset into an absolute address (by adding
1028  // the current address) the number represented becomes unsigned.
1029  instruction.args.push_back(
1030  from_integer(address+offset, unsignedbv_typet(16)));
1031  }
1032  address+=2;
1033  break;
1034 
1035  case 'O': // four byte branch offset, signed
1036  {
1037  s4 offset=read_u4();
1038  // By converting the signed offset into an absolute address (by adding
1039  // the current address) the number represented becomes unsigned.
1040  instruction.args.push_back(
1041  from_integer(address+offset, unsignedbv_typet(32)));
1042  }
1043  address+=4;
1044  break;
1045 
1046  case 'v': // local variable index (one byte)
1047  {
1048  if(wide_instruction)
1049  {
1050  u2 v = read_u2();
1051  instruction.args.push_back(from_integer(v, unsignedbv_typet(16)));
1052  address += 2;
1053  }
1054  else
1055  {
1056  u1 v = read_u1();
1057  instruction.args.push_back(from_integer(v, unsignedbv_typet(8)));
1058  address += 1;
1059  }
1060  }
1061 
1062  break;
1063 
1064  case 'V':
1065  // local variable index (two bytes) plus two signed bytes
1066  if(wide_instruction)
1067  {
1068  u2 v=read_u2();
1069  instruction.args.push_back(from_integer(v, unsignedbv_typet(16)));
1070  s2 c=read_u2();
1071  instruction.args.push_back(from_integer(c, signedbv_typet(16)));
1072  address+=4;
1073  }
1074  else // local variable index (one byte) plus one signed byte
1075  {
1076  u1 v=read_u1();
1077  instruction.args.push_back(from_integer(v, unsignedbv_typet(8)));
1078  s1 c=read_u1();
1079  instruction.args.push_back(from_integer(c, signedbv_typet(8)));
1080  address+=2;
1081  }
1082  break;
1083 
1084  case 'I': // two byte constant_pool index plus two bytes
1085  {
1086  u2 c=read_u2();
1087  instruction.args.push_back(constant(c));
1088  u1 b1=read_u1();
1089  instruction.args.push_back(from_integer(b1, unsignedbv_typet(8)));
1090  u1 b2=read_u1();
1091  instruction.args.push_back(from_integer(b2, unsignedbv_typet(8)));
1092  }
1093  address+=4;
1094  break;
1095 
1096  case 'L': // lookupswitch
1097  {
1098  u4 base_offset=address;
1099 
1100  // first a pad to 32-bit align
1101  while(((address+1)&3)!=0) { read_u1(); address++; }
1102 
1103  // now default value
1104  s4 default_value=read_u4();
1105  // By converting the signed offset into an absolute address (by adding
1106  // the current address) the number represented becomes unsigned.
1107  instruction.args.push_back(
1108  from_integer(base_offset+default_value, unsignedbv_typet(32)));
1109  address+=4;
1110 
1111  // number of pairs
1112  u4 npairs=read_u4();
1113  address+=4;
1114 
1115  for(std::size_t i=0; i<npairs; i++)
1116  {
1117  s4 match=read_u4();
1118  s4 offset=read_u4();
1119  instruction.args.push_back(
1120  from_integer(match, signedbv_typet(32)));
1121  // By converting the signed offset into an absolute address (by adding
1122  // the current address) the number represented becomes unsigned.
1123  instruction.args.push_back(
1124  from_integer(base_offset+offset, unsignedbv_typet(32)));
1125  address+=8;
1126  }
1127  }
1128  break;
1129 
1130  case 'T': // tableswitch
1131  {
1132  size_t base_offset=address;
1133 
1134  // first a pad to 32-bit align
1135  while(((address+1)&3)!=0) { read_u1(); address++; }
1136 
1137  // now default value
1138  s4 default_value=read_u4();
1139  instruction.args.push_back(
1140  from_integer(base_offset+default_value, signedbv_typet(32)));
1141  address+=4;
1142 
1143  // now low value
1144  s4 low_value=read_u4();
1145  address+=4;
1146 
1147  // now high value
1148  s4 high_value=read_u4();
1149  address+=4;
1150 
1151  // there are high-low+1 offsets, and they are signed
1152  for(s4 i=low_value; i<=high_value; i++)
1153  {
1154  s4 offset=read_u4();
1155  instruction.args.push_back(from_integer(i, signedbv_typet(32)));
1156  // By converting the signed offset into an absolute address (by adding
1157  // the current address) the number represented becomes unsigned.
1158  instruction.args.push_back(
1159  from_integer(base_offset+offset, unsignedbv_typet(32)));
1160  address+=4;
1161  }
1162  }
1163  break;
1164 
1165  case 'm': // multianewarray: constant-pool index plus one unsigned byte
1166  {
1167  u2 c=read_u2(); // constant-pool index
1168  instruction.args.push_back(constant(c));
1169  u1 dimensions=read_u1(); // number of dimensions
1170  instruction.args.push_back(
1171  from_integer(dimensions, unsignedbv_typet(8)));
1172  address+=3;
1173  }
1174  break;
1175 
1176  case 't': // array subtype, one byte
1177  {
1178  typet t;
1179  switch(read_u1())
1180  {
1181  case T_BOOLEAN: t.id(ID_bool); break;
1182  case T_CHAR: t.id(ID_char); break;
1183  case T_FLOAT: t.id(ID_float); break;
1184  case T_DOUBLE: t.id(ID_double); break;
1185  case T_BYTE: t.id(ID_byte); break;
1186  case T_SHORT: t.id(ID_short); break;
1187  case T_INT: t.id(ID_int); break;
1188  case T_LONG: t.id(ID_long); break;
1189  default:{};
1190  }
1191  instruction.args.push_back(type_exprt(t));
1192  }
1193  address+=1;
1194  break;
1195 
1196  case 's': // a signed short
1197  {
1198  s2 s=read_u2();
1199  instruction.args.push_back(from_integer(s, signedbv_typet(16)));
1200  }
1201  address+=2;
1202  break;
1203 
1204  default:
1205  throw "unknown JVM bytecode instruction";
1206  }
1207  bytecode_index++;
1208  }
1209 
1210  if(address!=code_length)
1211  {
1212  error() << "bytecode length mismatch" << eom;
1213  throw 0;
1214  }
1215 }
1216 
1218 {
1219  u2 attribute_name_index=read_u2();
1220  u4 attribute_length=read_u4();
1221 
1222  irep_idt attribute_name=pool_entry(attribute_name_index).s;
1223 
1224  if(attribute_name == "Code" && !skip_instructions)
1225  {
1226  UNUSED_u2(max_stack);
1227  UNUSED_u2(max_locals);
1228 
1229  rbytecode(method.instructions);
1230 
1231  u2 exception_table_length=read_u2();
1232  method.exception_table.resize(exception_table_length);
1233 
1234  for(std::size_t e=0; e<exception_table_length; e++)
1235  {
1236  u2 start_pc=read_u2();
1237  u2 end_pc=read_u2();
1238 
1239  // from the class file format spec ("4.7.3. The Code Attribute" for Java8)
1240  INVARIANT(
1241  start_pc < end_pc,
1242  "The start_pc must be less than the end_pc as this is the range the "
1243  "exception is active");
1244 
1245  u2 handler_pc=read_u2();
1246  u2 catch_type=read_u2();
1247  method.exception_table[e].start_pc=start_pc;
1248  method.exception_table[e].end_pc=end_pc;
1249  method.exception_table[e].handler_pc=handler_pc;
1250  if(catch_type!=0)
1251  method.exception_table[e].catch_type =
1252  to_struct_tag_type(pool_entry(catch_type).expr.type());
1253  }
1254 
1255  u2 attributes_count=read_u2();
1256 
1257  for(std::size_t j=0; j<attributes_count; j++)
1258  rcode_attribute(method);
1259 
1260  irep_idt line_number;
1261 
1262  // add missing line numbers
1263  for(methodt::instructionst::iterator
1264  it=method.instructions.begin();
1265  it!=method.instructions.end();
1266  it++)
1267  {
1268  if(!it->source_location.get_line().empty())
1269  line_number=it->source_location.get_line();
1270  else if(!line_number.empty())
1271  it->source_location.set_line(line_number);
1272  it->source_location
1273  .set_function(
1274  "java::"+id2string(parse_tree.parsed_class.name)+"."+
1275  id2string(method.name)+":"+method.descriptor);
1276  }
1277 
1278  // line number of method
1279  if(!method.instructions.empty())
1280  method.source_location.set_line(
1281  method.instructions.begin()->source_location.get_line());
1282  }
1283  else if(attribute_name=="Signature")
1284  {
1285  u2 signature_index=read_u2();
1286  method.signature=id2string(pool_entry(signature_index).s);
1287  }
1288  else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1289  attribute_name=="RuntimeVisibleAnnotations")
1290  {
1292  }
1293  else if(
1294  attribute_name == "RuntimeInvisibleParameterAnnotations" ||
1295  attribute_name == "RuntimeVisibleParameterAnnotations")
1296  {
1297  u1 parameter_count = read_u1();
1298  // There may be attributes for both runtime-visiible and rutime-invisible
1299  // annotations, the length of either array may be longer than the other as
1300  // trailing parameters without annotations are omitted.
1301  // Extend our parameter_annotations if this one is longer than the one
1302  // previously recorded (if any).
1303  if(method.parameter_annotations.size() < parameter_count)
1304  method.parameter_annotations.resize(parameter_count);
1305  for(u2 param_no = 0; param_no < parameter_count; ++param_no)
1307  }
1308  else if(attribute_name == "Exceptions")
1309  {
1311  }
1312  else
1313  skip_bytes(attribute_length);
1314 }
1315 
1317 {
1318  u2 attribute_name_index=read_u2();
1319  u4 attribute_length=read_u4();
1320 
1321  irep_idt attribute_name=pool_entry(attribute_name_index).s;
1322 
1323  if(attribute_name=="Signature")
1324  {
1325  u2 signature_index=read_u2();
1326  field.signature=id2string(pool_entry(signature_index).s);
1327  }
1328  else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1329  attribute_name=="RuntimeVisibleAnnotations")
1330  {
1332  }
1333  else
1334  skip_bytes(attribute_length);
1335 }
1336 
1338 {
1339  u2 attribute_name_index=read_u2();
1340  u4 attribute_length=read_u4();
1341 
1342  irep_idt attribute_name=pool_entry(attribute_name_index).s;
1343 
1344  if(attribute_name=="LineNumberTable")
1345  {
1346  // address -> instructiont
1347  typedef std::map<unsigned,
1348  methodt::instructionst::iterator> instruction_mapt;
1349  instruction_mapt instruction_map;
1350 
1351  for(methodt::instructionst::iterator
1352  it=method.instructions.begin();
1353  it!=method.instructions.end();
1354  it++)
1355  {
1356  instruction_map[it->address]=it;
1357  }
1358 
1359  u2 line_number_table_length=read_u2();
1360 
1361  for(std::size_t i=0; i<line_number_table_length; i++)
1362  {
1363  u2 start_pc=read_u2();
1364  u2 line_number=read_u2();
1365 
1366  // annotate the bytecode program
1367  instruction_mapt::const_iterator it=
1368  instruction_map.find(start_pc);
1369 
1370  if(it!=instruction_map.end())
1371  it->second->source_location.set_line(line_number);
1372  }
1373  }
1374  else if(attribute_name=="LocalVariableTable")
1375  {
1376  u2 local_variable_table_length=read_u2();
1377 
1378  method.local_variable_table.resize(local_variable_table_length);
1379 
1380  for(std::size_t i=0; i<local_variable_table_length; i++)
1381  {
1382  u2 start_pc=read_u2();
1383  u2 length=read_u2();
1384  u2 name_index=read_u2();
1385  u2 descriptor_index=read_u2();
1386  u2 index=read_u2();
1387 
1388  method.local_variable_table[i].index=index;
1389  method.local_variable_table[i].name=pool_entry(name_index).s;
1390  method.local_variable_table[i].descriptor=
1391  id2string(pool_entry(descriptor_index).s);
1392  method.local_variable_table[i].start_pc=start_pc;
1393  method.local_variable_table[i].length=length;
1394  }
1395  }
1396  else if(attribute_name=="LocalVariableTypeTable")
1397  {
1399  }
1400  else if(attribute_name=="StackMapTable")
1401  {
1402  u2 stack_map_entries=read_u2();
1403 
1404  method.stack_map_table.resize(stack_map_entries);
1405 
1406  for(size_t i=0; i<stack_map_entries; i++)
1407  {
1408  u1 frame_type=read_u1();
1409  if(frame_type<=63)
1410  {
1412  method.stack_map_table[i].locals.resize(0);
1413  method.stack_map_table[i].stack.resize(0);
1414  }
1415  else if(64<=frame_type && frame_type<=127)
1416  {
1417  method.stack_map_table[i].type=
1419  method.stack_map_table[i].locals.resize(0);
1420  method.stack_map_table[i].stack.resize(1);
1421  methodt::verification_type_infot verification_type_info;
1422  read_verification_type_info(verification_type_info);
1423  method.stack_map_table[i].stack[0]=verification_type_info;
1424  }
1425  else if(frame_type==247)
1426  {
1427  method.stack_map_table[i].type=
1429  method.stack_map_table[i].locals.resize(0);
1430  method.stack_map_table[i].stack.resize(1);
1431  methodt::verification_type_infot verification_type_info;
1432  u2 offset_delta=read_u2();
1433  read_verification_type_info(verification_type_info);
1434  method.stack_map_table[i].stack[0]=verification_type_info;
1435  method.stack_map_table[i].offset_delta=offset_delta;
1436  }
1437  else if(248<=frame_type && frame_type<=250)
1438  {
1440  method.stack_map_table[i].locals.resize(0);
1441  method.stack_map_table[i].stack.resize(0);
1442  u2 offset_delta=read_u2();
1443  method.stack_map_table[i].offset_delta=offset_delta;
1444  }
1445  else if(frame_type==251)
1446  {
1447  method.stack_map_table[i].type
1449  method.stack_map_table[i].locals.resize(0);
1450  method.stack_map_table[i].stack.resize(0);
1451  u2 offset_delta=read_u2();
1452  method.stack_map_table[i].offset_delta=offset_delta;
1453  }
1454  else if(252<=frame_type && frame_type<=254)
1455  {
1456  size_t new_locals=(size_t) (frame_type-251);
1458  method.stack_map_table[i].locals.resize(new_locals);
1459  method.stack_map_table[i].stack.resize(0);
1460  u2 offset_delta=read_u2();
1461  method.stack_map_table[i].offset_delta=offset_delta;
1462  for(size_t k=0; k<new_locals; k++)
1463  {
1464  method.stack_map_table[i].locals
1465  .push_back(methodt::verification_type_infot());
1467  method.stack_map_table[i].locals.back();
1469  }
1470  }
1471  else if(frame_type==255)
1472  {
1474  u2 offset_delta=read_u2();
1475  method.stack_map_table[i].offset_delta=offset_delta;
1476  u2 number_locals=read_u2();
1477  method.stack_map_table[i].locals.resize(number_locals);
1478  for(size_t k=0; k<(size_t) number_locals; k++)
1479  {
1480  method.stack_map_table[i].locals
1481  .push_back(methodt::verification_type_infot());
1483  method.stack_map_table[i].locals.back();
1485  }
1486  u2 number_stack_items=read_u2();
1487  method.stack_map_table[i].stack.resize(number_stack_items);
1488  for(size_t k=0; k<(size_t) number_stack_items; k++)
1489  {
1490  method.stack_map_table[i].stack
1491  .push_back(methodt::verification_type_infot());
1493  method.stack_map_table[i].stack.back();
1495  }
1496  }
1497  else
1498  throw "error: unknown stack frame type encountered";
1499  }
1500  }
1501  else
1502  skip_bytes(attribute_length);
1503 }
1504 
1507 {
1508  u1 tag=read_u1();
1509  switch(tag)
1510  {
1511  case VTYPE_INFO_TOP:
1513  break;
1514  case VTYPE_INFO_INTEGER:
1516  break;
1517  case VTYPE_INFO_FLOAT:
1519  break;
1520  case VTYPE_INFO_LONG:
1522  break;
1523  case VTYPE_INFO_DOUBLE:
1525  break;
1526  case VTYPE_INFO_ITEM_NULL:
1528  break;
1531  break;
1532  case VTYPE_INFO_OBJECT:
1534  v.cpool_index=read_u2();
1535  break;
1536  case VTYPE_INFO_UNINIT:
1538  v.offset=read_u2();
1539  break;
1540  default:
1541  throw "error: unknown verification type info encountered";
1542  }
1543 }
1544 
1546  annotationst &annotations)
1547 {
1548  u2 num_annotations=read_u2();
1549 
1550  for(u2 number=0; number<num_annotations; number++)
1551  {
1552  annotationt annotation;
1553  rRuntimeAnnotation(annotation);
1554  annotations.push_back(annotation);
1555  }
1556 }
1557 
1559  annotationt &annotation)
1560 {
1561  u2 type_index=read_u2();
1562  annotation.type=type_entry(type_index);
1564 }
1565 
1567  annotationt::element_value_pairst &element_value_pairs)
1568 {
1569  u2 num_element_value_pairs=read_u2();
1570  element_value_pairs.resize(num_element_value_pairs);
1571 
1572  for(auto &element_value_pair : element_value_pairs)
1573  {
1574  u2 element_name_index=read_u2();
1575  element_value_pair.element_name=pool_entry(element_name_index).s;
1576  element_value_pair.value = get_relement_value();
1577  }
1578 }
1579 
1587 {
1588  u1 tag=read_u1();
1589 
1590  switch(tag)
1591  {
1592  case 'e':
1593  {
1594  UNUSED_u2(type_name_index);
1595  UNUSED_u2(const_name_index);
1596  // todo: enum
1597  return exprt();
1598  }
1599 
1600  case 'c':
1601  {
1602  u2 class_info_index = read_u2();
1603  return symbol_exprt(pool_entry(class_info_index).s);
1604  }
1605 
1606  case '@':
1607  {
1608  // TODO: return this wrapped in an exprt
1609  // another annotation, recursively
1610  annotationt annotation;
1611  rRuntimeAnnotation(annotation);
1612  return exprt();
1613  }
1614 
1615  case '[':
1616  {
1617  array_exprt values;
1618  u2 num_values=read_u2();
1619  for(std::size_t i=0; i<num_values; i++)
1620  {
1621  values.operands().push_back(get_relement_value());
1622  }
1623  return std::move(values);
1624  }
1625 
1626  case 's':
1627  {
1628  u2 const_value_index=read_u2();
1629  return string_constantt(pool_entry(const_value_index).s);
1630  }
1631 
1632  default:
1633  {
1634  u2 const_value_index=read_u2();
1635  return constant(const_value_index);
1636  }
1637  }
1638 }
1639 
1652  classt &parsed_class,
1653  const u4 &attribute_length)
1654 {
1655  std::string name = parsed_class.name.c_str();
1656  u2 number_of_classes = read_u2();
1657  u4 number_of_bytes_to_be_read = number_of_classes * 8 + 2;
1658  INVARIANT(
1659  number_of_bytes_to_be_read == attribute_length,
1660  "The number of bytes to be read for the InnerClasses attribute does not "
1661  "match the attribute length.");
1662 
1663  const auto pool_entry_lambda = [this](u2 index) -> pool_entryt & {
1664  return pool_entry(index);
1665  };
1666  const auto remove_separator_char = [](std::string str, char ch) {
1667  str.erase(std::remove(str.begin(), str.end(), ch), str.end());
1668  return str;
1669  };
1670 
1671  for(int i = 0; i < number_of_classes; i++)
1672  {
1673  u2 inner_class_info_index = read_u2();
1674  u2 outer_class_info_index = read_u2();
1675  u2 inner_name_index = read_u2();
1676  u2 inner_class_access_flags = read_u2();
1677 
1678  std::string inner_class_info_name =
1679  class_infot(pool_entry(inner_class_info_index))
1680  .get_name(pool_entry_lambda);
1681  bool is_private = (inner_class_access_flags & ACC_PRIVATE) != 0;
1682  bool is_public = (inner_class_access_flags & ACC_PUBLIC) != 0;
1683  bool is_protected = (inner_class_access_flags & ACC_PROTECTED) != 0;
1684  bool is_static = (inner_class_access_flags & ACC_STATIC) != 0;
1685 
1686  // If the original parsed class name matches the inner class name,
1687  // the parsed class is an inner class, so overwrite the parsed class'
1688  // access information and mark it as an inner class.
1689  bool is_inner_class = remove_separator_char(id2string(parsed_class.name), '.') ==
1690  remove_separator_char(inner_class_info_name, '/');
1691  if(is_inner_class)
1692  parsed_class.is_inner_class = is_inner_class;
1693  if(!is_inner_class)
1694  continue;
1695  parsed_class.is_static_class = is_static;
1696  // This is a marker that a class is anonymous.
1697  if(inner_name_index == 0)
1698  parsed_class.is_anonymous_class = true;
1699  // Note that if outer_class_info_index == 0, the inner class is an anonymous
1700  // or local class, and is treated as private.
1701  if(outer_class_info_index == 0)
1702  {
1703  parsed_class.is_private = true;
1704  parsed_class.is_protected = false;
1705  parsed_class.is_public = false;
1706  }
1707  else
1708  {
1709  std::string outer_class_info_name =
1710  class_infot(pool_entry(outer_class_info_index))
1711  .get_name(pool_entry_lambda);
1712  parsed_class.outer_class =
1713  constant(outer_class_info_index).type().get(ID_C_base_name);
1714  parsed_class.is_private = is_private;
1715  parsed_class.is_protected = is_protected;
1716  parsed_class.is_public = is_public;
1717  }
1718  }
1719 }
1720 
1727 {
1728  u2 number_of_exceptions = read_u2();
1729 
1730  std::vector<irep_idt> exceptions;
1731  for(size_t i = 0; i < number_of_exceptions; i++)
1732  {
1733  u2 exception_index_table = read_u2();
1734  const irep_idt exception_name =
1735  constant(exception_index_table).type().get(ID_C_base_name);
1736  exceptions.push_back(exception_name);
1737  }
1738  return exceptions;
1739 }
1740 
1742 {
1743  u2 attribute_name_index=read_u2();
1744  u4 attribute_length=read_u4();
1745 
1746  irep_idt attribute_name=pool_entry(attribute_name_index).s;
1747 
1748  if(attribute_name=="SourceFile")
1749  {
1750  u2 sourcefile_index=read_u2();
1751  irep_idt sourcefile_name;
1752 
1753  std::string fqn(id2string(parsed_class.name));
1754  size_t last_index=fqn.find_last_of(".");
1755  if(last_index==std::string::npos)
1756  sourcefile_name=pool_entry(sourcefile_index).s;
1757  else
1758  {
1759  std::string package_name=fqn.substr(0, last_index+1);
1760  std::replace(package_name.begin(), package_name.end(), '.', '/');
1761  const std::string &full_file_name=
1762  package_name+id2string(pool_entry(sourcefile_index).s);
1763  sourcefile_name=full_file_name;
1764  }
1765 
1766  for(methodst::iterator m_it=parsed_class.methods.begin();
1767  m_it!=parsed_class.methods.end();
1768  m_it++)
1769  {
1770  m_it->source_location.set_file(sourcefile_name);
1771  for(instructionst::iterator i_it=m_it->instructions.begin();
1772  i_it!=m_it->instructions.end();
1773  i_it++)
1774  {
1775  if(!i_it->source_location.get_line().empty())
1776  i_it->source_location.set_file(sourcefile_name);
1777  }
1778  }
1779  }
1780  else if(attribute_name=="Signature")
1781  {
1782  u2 signature_index=read_u2();
1783  parsed_class.signature=id2string(pool_entry(signature_index).s);
1785  parsed_class.signature.value(),
1787  }
1788  else if(attribute_name=="RuntimeInvisibleAnnotations" ||
1789  attribute_name=="RuntimeVisibleAnnotations")
1790  {
1792  }
1793  else if(attribute_name == "BootstrapMethods")
1794  {
1795  // for this attribute
1796  // cf. https://docs.oracle.com/javase/specs/jvms/se8/html/jvms-4.html#jvms-4.7.23
1797  INVARIANT(
1798  !parsed_class.attribute_bootstrapmethods_read,
1799  "only one BootstrapMethods argument is allowed in a class file");
1800 
1801  // mark as read in parsed class
1802  parsed_class.attribute_bootstrapmethods_read = true;
1803  read_bootstrapmethods_entry(parsed_class);
1804  }
1805  else if(attribute_name == "InnerClasses")
1806  {
1808  parsed_class, attribute_length);
1809  }
1810  else
1811  skip_bytes(attribute_length);
1812 }
1813 
1815 {
1816  u2 methods_count=read_u2();
1817 
1818  for(std::size_t j=0; j<methods_count; j++)
1819  rmethod(parsed_class);
1820 }
1821 
1822 #define ACC_PUBLIC 0x0001
1823 #define ACC_PRIVATE 0x0002
1824 #define ACC_PROTECTED 0x0004
1825 #define ACC_STATIC 0x0008
1826 #define ACC_FINAL 0x0010
1827 #define ACC_VARARGS 0x0080
1828 #define ACC_SUPER 0x0020
1829 #define ACC_VOLATILE 0x0040
1830 #define ACC_TRANSIENT 0x0080
1831 #define ACC_INTERFACE 0x0200
1832 #define ACC_ABSTRACT 0x0400
1833 #define ACC_SYNTHETIC 0x1000
1834 #define ACC_ANNOTATION 0x2000
1835 #define ACC_ENUM 0x4000
1836 
1838 {
1839  methodt &method=parsed_class.add_method();
1840 
1841  u2 access_flags=read_u2();
1842  u2 name_index=read_u2();
1843  u2 descriptor_index=read_u2();
1844 
1845  method.is_final=(access_flags&ACC_FINAL)!=0;
1846  method.is_static=(access_flags&ACC_STATIC)!=0;
1847  method.is_abstract=(access_flags&ACC_ABSTRACT)!=0;
1848  method.is_public=(access_flags&ACC_PUBLIC)!=0;
1849  method.is_protected=(access_flags&ACC_PROTECTED)!=0;
1850  method.is_private=(access_flags&ACC_PRIVATE)!=0;
1851  method.is_synchronized=(access_flags&ACC_SYNCHRONIZED)!=0;
1852  method.is_native=(access_flags&ACC_NATIVE)!=0;
1853  method.is_bridge = (access_flags & ACC_BRIDGE) != 0;
1854  method.is_varargs = (access_flags & ACC_VARARGS) != 0;
1855  method.name=pool_entry(name_index).s;
1856  method.base_name=pool_entry(name_index).s;
1857  method.descriptor=id2string(pool_entry(descriptor_index).s);
1858 
1859  const auto flags = (method.is_public ? 1 : 0) +
1860  (method.is_protected?1:0)+
1861  (method.is_private?1:0);
1862  DATA_INVARIANT(flags<=1, "at most one of public, protected, private");
1863  u2 attributes_count=read_u2();
1864 
1865  for(std::size_t j=0; j<attributes_count; j++)
1866  rmethod_attribute(method);
1867 }
1868 
1871  std::istream &istream,
1872  message_handlert &message_handler,
1873  bool skip_instructions)
1874 {
1875  java_bytecode_parsert java_bytecode_parser(skip_instructions);
1876  java_bytecode_parser.in=&istream;
1877  java_bytecode_parser.set_message_handler(message_handler);
1878 
1879  bool parser_result=java_bytecode_parser.parse();
1880 
1881  if(parser_result)
1882  {
1883  return {};
1884  }
1885 
1886  return std::move(java_bytecode_parser.parse_tree);
1887 }
1888 
1891  const std::string &file,
1892  message_handlert &message_handler,
1893  bool skip_instructions)
1894 {
1895  std::ifstream in(file, std::ios::binary);
1896 
1897  if(!in)
1898  {
1899  messaget message(message_handler);
1900  message.error() << "failed to open input file `"
1901  << file << '\'' << messaget::eom;
1902  return {};
1903  }
1904 
1905  return java_bytecode_parse(in, message_handler, skip_instructions);
1906 }
1907 
1912 {
1913  u2 local_variable_type_table_length=read_u2();
1914 
1915  INVARIANT(
1916  local_variable_type_table_length<=method.local_variable_table.size(),
1917  "Local variable type table cannot have more elements "
1918  "than the local variable table.");
1919  for(std::size_t i=0; i<local_variable_type_table_length; i++)
1920  {
1921  u2 start_pc=read_u2();
1922  u2 length=read_u2();
1923  u2 name_index=read_u2();
1924  u2 signature_index=read_u2();
1925  u2 index=read_u2();
1926 
1927  bool found=false;
1928  for(auto &lvar : method.local_variable_table)
1929  {
1930  // compare to entry in LVT
1931  if(lvar.index==index &&
1932  lvar.name==pool_entry(name_index).s &&
1933  lvar.start_pc==start_pc &&
1934  lvar.length==length)
1935  {
1936  found=true;
1937  lvar.signature=id2string(pool_entry(signature_index).s);
1938  break;
1939  }
1940  }
1941  INVARIANT(
1942  found,
1943  "Entry in LocalVariableTypeTable must be present in LVT");
1944  }
1945 }
1946 
1954 {
1955  const std::function<pool_entryt &(u2)> pool_entry_lambda =
1956  [this](u2 index) -> pool_entryt & { return pool_entry(index); };
1957 
1958  const base_ref_infot &ref_entry = entry.get_reference(pool_entry_lambda);
1959 
1960  const class_infot &class_entry = ref_entry.get_class(pool_entry_lambda);
1961  const name_and_type_infot &name_and_type =
1962  ref_entry.get_name_and_type(pool_entry_lambda);
1963 
1964  std::string class_name = class_entry.get_name(pool_entry_lambda);
1965  // replace '.' for '$' (inner classes)
1966  std::replace(class_name.begin(), class_name.end(), '.', '$');
1967  // replace '/' for '.' (package)
1968  std::replace(class_name.begin(), class_name.end(), '/', '.');
1969  const std::string method_ref =
1970  class_name + "." + name_and_type.get_name(pool_entry_lambda) + ':' +
1971  name_and_type.get_descriptor(pool_entry_lambda);
1972 
1973  lambda_method_handlet lambda_method_handle;
1974 
1975  if(has_prefix(name_and_type.get_name(pool_entry_lambda), "lambda$"))
1976  {
1977  // names seem to be lambda$POSTFIX$NUM
1978  // where POSTFIX is FUN for a function name in which the lambda is define
1979  // "static" when it is a static member of the class
1980  // "new" when it is a class variable, instantiated in <init>
1981  lambda_method_handle.lambda_method_name =
1982  name_and_type.get_name(pool_entry_lambda);
1983  lambda_method_handle.lambda_method_ref = method_ref;
1984  lambda_method_handle.handle_type =
1986 
1987  return lambda_method_handle;
1988  }
1989 
1990  return {};
1991 }
1992 
1997 {
1998  u2 num_bootstrap_methods = read_u2();
1999  for(size_t bootstrap_method_index = 0;
2000  bootstrap_method_index < num_bootstrap_methods;
2001  ++bootstrap_method_index)
2002  {
2003  u2 bootstrap_methodhandle_ref = read_u2();
2004  const pool_entryt &entry = pool_entry(bootstrap_methodhandle_ref);
2005 
2006  method_handle_infot method_handle{entry};
2007 
2008  u2 num_bootstrap_arguments = read_u2();
2009  debug() << "INFO: parse BootstrapMethod handle " << num_bootstrap_arguments
2010  << " #args" << eom;
2011 
2012  // read u2 values of entry into vector
2013  u2_valuest u2_values(num_bootstrap_arguments);
2014  for(size_t i = 0; i < num_bootstrap_arguments; i++)
2015  u2_values[i] = read_u2();
2016 
2017  // try parsing bootstrap method handle
2018  // each entry contains a MethodHandle structure
2019  // u2 tag
2020  // u2 reference kind which must be in the range from 1 to 9
2021  // u2 reference index into the constant pool
2022  //
2023  // reference kinds use the following
2024  // 1 to 4 must point to a CONSTANT_Fieldref structure
2025  // 5 or 8 must point to a CONSTANT_Methodref structure
2026  // 6 or 7 must point to a CONSTANT_Methodref or
2027  // CONSTANT_InterfaceMethodref structure, if the class file version
2028  // number is 52.0 or above, to a CONSTANT_Methodref only in the case
2029  // of less than 52.0
2030  // 9 must point to a CONSTANT_InterfaceMethodref structure
2031 
2032  // the index must point to a CONSTANT_String
2033  // CONSTANT_Class
2034  // CONSTANT_Integer
2035  // CONSTANT_Long
2036  // CONSTANT_Float
2037  // CONSTANT_Double
2038  // CONSTANT_MethodHandle
2039  // CONSTANT_MethodType
2040 
2041  // We read the three arguments here to see whether they correspond to
2042  // our hypotheses for this being a lambda function entry.
2043 
2044  // Need at least 3 arguments, the interface type, the method hanlde
2045  // and the method_type, otherwise it doesn't look like a call that we
2046  // understand
2047  if(num_bootstrap_arguments < 3)
2048  {
2050  parsed_class, bootstrap_method_index, std::move(u2_values));
2051  debug()
2052  << "format of BootstrapMethods entry not recognized: too few arguments"
2053  << eom;
2054  continue;
2055  }
2056 
2057  u2 interface_type_index = u2_values[0];
2058  u2 method_handle_index = u2_values[1];
2059  u2 method_type_index = u2_values[2];
2060 
2061  // The additional arguments for the altmetafactory call are skipped,
2062  // as they are currently not used. We verify though that they are of
2063  // CONSTANT_Integer type, cases where this does not hold will be
2064  // analyzed further.
2065  bool recognized = true;
2066  for(size_t i = 3; i < num_bootstrap_arguments; i++)
2067  {
2068  u2 skipped_argument = u2_values[i];
2069  recognized &= pool_entry(skipped_argument).tag == CONSTANT_Integer;
2070  }
2071 
2072  if(!recognized)
2073  {
2074  debug() << "format of BootstrapMethods entry not recognized: extra "
2075  "arguments of wrong type"
2076  << eom;
2078  parsed_class, bootstrap_method_index, std::move(u2_values));
2079  continue;
2080  }
2081 
2082  const pool_entryt &interface_type_argument =
2083  pool_entry(interface_type_index);
2084  const pool_entryt &method_handle_argument = pool_entry(method_handle_index);
2085  const pool_entryt &method_type_argument = pool_entry(method_type_index);
2086 
2087  if(
2088  interface_type_argument.tag != CONSTANT_MethodType ||
2089  method_handle_argument.tag != CONSTANT_MethodHandle ||
2090  method_type_argument.tag != CONSTANT_MethodType)
2091  {
2092  debug() << "format of BootstrapMethods entry not recognized: arguments "
2093  "wrong type"
2094  << eom;
2096  parsed_class, bootstrap_method_index, std::move(u2_values));
2097  continue;
2098  }
2099 
2100  debug() << "INFO: parse lambda handle" << eom;
2101  optionalt<lambda_method_handlet> lambda_method_handle =
2102  parse_method_handle(method_handle_infot{method_handle_argument});
2103 
2104  if(!lambda_method_handle.has_value())
2105  {
2106  debug() << "format of BootstrapMethods entry not recognized: method "
2107  "handle not recognised"
2108  << eom;
2110  parsed_class, bootstrap_method_index, std::move(u2_values));
2111  continue;
2112  }
2113 
2114  // If parse_method_handle can't parse the lambda method, it should return {}
2115  POSTCONDITION(
2116  lambda_method_handle->handle_type != method_handle_typet::UNKNOWN_HANDLE);
2117 
2118  lambda_method_handle->interface_type =
2119  pool_entry(interface_type_argument.ref1).s;
2120  lambda_method_handle->method_type = pool_entry(method_type_argument.ref1).s;
2121  lambda_method_handle->u2_values = std::move(u2_values);
2122  debug() << "lambda function reference "
2123  << id2string(lambda_method_handle->lambda_method_name)
2124  << " in class \"" << parsed_class.name << "\""
2125  << "\n interface type is "
2126  << id2string(pool_entry(interface_type_argument.ref1).s)
2127  << "\n method type is "
2128  << id2string(pool_entry(method_type_argument.ref1).s) << eom;
2129  parsed_class.add_method_handle(
2130  bootstrap_method_index, *lambda_method_handle);
2131  }
2132 }
2133 
2140  java_bytecode_parsert::classt &parsed_class,
2141  size_t bootstrap_method_index,
2142  java_bytecode_parsert::u2_valuest u2_values) const
2143 {
2144  const lambda_method_handlet lambda_method_handle(std::move(u2_values));
2145  parsed_class.add_method_handle(bootstrap_method_index, lambda_method_handle);
2146 }
#define VTYPE_INFO_OBJECT
The type of an expression, extends irept.
Definition: type.h:27
java_bytecode_parse_treet::classt::lambda_method_handlet lambda_method_handlet
Fixed-width bit-vector with unsigned binary interpretation.
Definition: std_types.h:1223
void rinterfaces(classt &parsed_class)
void store_unknown_method_handle(classt &parsed_class, size_t bootstrap_method_index, u2_valuest u2_values) const
Creates an unknown method handle and puts it into the parsed_class.
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
void set_java_bytecode_index(const irep_idt &index)
#define ACC_NATIVE
#define UNUSED_u2(x)
#define VTYPE_INFO_LONG
void get_dependencies_from_generic_parameters(const std::string &signature, std::set< irep_idt > &refs)
Collect information about generic type parameters from a given signature.
Definition: java_types.cpp:872
java_bytecode_parse_treet::classt::u2_valuest u2_valuest
void rRuntimeAnnotation_attribute(annotationst &)
uint64_t u8
Definition: bytecode_info.h:58
#define CONSTANT_Methodref
constant_exprt to_expr() const
Definition: ieee_float.cpp:698
#define CONSTANT_MethodType
void rinner_classes_attribute(classt &parsed_class, const u4 &attribute_length)
Corresponds to the element_value structure Described in Java 8 specification 4.7.6 https://docs....
struct_tag_typet java_classname(const std::string &id)
Definition: java_types.cpp:734
std::vector< instructiont > instructionst
pool_entryt & pool_entry(u2 index)
void rmethod(classt &parsed_class)
std::istream * in
Definition: parser.h:26
method_handle_kindt
Correspond to the different valid values for field reference_kind From Java 8 spec 4....
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
#define ACC_PROTECTED
typet java_int_type()
Definition: java_types.cpp:32
java_bytecode_parse_treet::annotationst annotationst
void rmethods(classt &parsed_class)
#define CONSTANT_InterfaceMethodref
struct bytecode_infot const bytecode_info[]
An expression denoting a type.
Definition: std_expr.h:4370
java_bytecode_parse_treet parse_tree
#define ACC_PRIVATE
#define VTYPE_INFO_DOUBLE
#define CONSTANT_Integer
void get_annotation_class_refs(const java_bytecode_parse_treet::annotationst &annotations)
For each of the given annotations, get a reference to its class and recursively get class references ...
void replace(const union_find_replacet &replace_map, string_not_contains_constraintt &constraint)
const componentst & components() const
Definition: std_types.h:205
#define CONSTANT_Fieldref
Represents the argument of an instruction that uses a CONSTANT_Fieldref This is used for example as a...
typet java_long_type()
Definition: java_types.cpp:42
typet & type()
Return the type of the expression.
Definition: expr.h:68
#define CONSTANT_Float
Corresponds to the CONSTANT_Class_info Structure Described in Java 8 specification 4....
void add_method_handle(size_t bootstrap_index, const lambda_method_handlet &handle)
std::vector< annotationst > parameter_annotations
Java annotations that were applied to parameters of this method.
std::vector< annotationt > annotationst
class_infot(const pool_entryt &entry)
#define VTYPE_INFO_ITEM_NULL
#define CONSTANT_Class
Structure type, corresponds to C style structs.
Definition: std_types.h:276
uint32_t u4
Definition: bytecode_info.h:57
#define POSTCONDITION(CONDITION)
Definition: invariant.h:454
#define T_FLOAT
std::string get_name(pool_entry_lookupt pool_entry) const
Definition: parser.h:23
#define ACC_ABSTRACT
#define T_BYTE
static ieee_float_spect double_precision()
Definition: ieee_float.h:80
class_infot get_class(pool_entry_lookupt pool_entry) const
void relement_value_pairs(annotationt::element_value_pairst &)
#define CONSTANT_MethodHandle
Parser utilities.
#define CONSTANT_Long
java_bytecode_parsert::pool_entryt pool_entryt
const irep_idt & id() const
Definition: irep.h:259
typet java_type_from_string(const std::string &src, const std::string &class_name_prefix)
Transforms a string representation of a Java type into an internal type representation thereof.
Definition: java_types.cpp:492
java_bytecode_parse_treet::instructiont instructiont
#define ACC_FINAL
java_bytecode_parsert(bool skip_instructions)
void unpack(const mp_integer &i)
Definition: ieee_float.cpp:316
std::vector< pool_entryt > constant_poolt
name_and_type_infot get_name_and_type(pool_entry_lookupt pool_entry) const
#define CONSTANT_Double
#define VTYPE_INFO_UNINIT
#define ACC_BRIDGE
#define CONSTANT_NameAndType
Fixed-width bit-vector with two's complement interpretation.
Definition: std_types.h:1278
void parse_local_variable_type_table(methodt &method)
Parses the local variable type table of a method.
#define CONSTANT_Utf8
nonstd::optional< T > optionalt
Definition: optional.h:35
java_bytecode_parse_treet::classt classt
API to expression classes.
const irep_idt & get(const irep_namet &name) const
Definition: irep.cpp:212
mstreamt & error() const
Definition: message.h:386
void set_line(const irep_idt &line)
static ieee_float_spect single_precision()
Definition: ieee_float.h:74
name_and_type_infot(java_bytecode_parsert::pool_entryt entry)
base_ref_infot get_reference(pool_entry_lookupt pool_entry) const
#define PRECONDITION(CONDITION)
Definition: invariant.h:438
#define VTYPE_INFO_UNINIT_THIS
#define T_DOUBLE
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
const struct_tag_typet & to_struct_tag_type(const typet &type)
Cast a typet to a struct_tag_typet.
Definition: std_types.h:543
#define ACC_SYNCHRONIZED
#define ACC_SYNTHETIC
uint16_t u2
Definition: bytecode_info.h:56
#define VTYPE_INFO_TOP
base_ref_infot(pool_entryt entry)
virtual void set_message_handler(message_handlert &_message_handler)
Definition: message.h:169
void rbytecode(methodt::instructionst &)
int32_t s4
Definition: bytecode_info.h:61
#define T_LONG
void skip_bytes(std::size_t bytes)
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:144
#define CONSTANT_String
const struct_typet & to_struct_type(const typet &type)
Cast a typet to a struct_typet.
Definition: std_types.h:349
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:35
void rclass_attribute(classt &parsed_class)
void rmethod_attribute(methodt &method)
#define T_INT
void get_class_refs_rec(const typet &)
#define VTYPE_INFO_FLOAT
#define ACC_ENUM
typet java_type_from_string_with_exception(const std::string &descriptor, const optionalt< std::string > &signature, const std::string &class_name)
Definition: java_types.h:705
std::vector< element_value_pairt > element_value_pairst
std::string get_name(pool_entry_lookupt pool_entry) const
static eomt eom
Definition: message.h:284
optionalt< java_bytecode_parse_treet > java_bytecode_parse(std::istream &istream, message_handlert &message_handler, bool skip_instructions)
Attempt to parse a Java class from the given stream.
#define T_BOOLEAN
java_bytecode_parse_treet::classt::fieldst fieldst
method_handle_kindt reference_kind
void rfields(classt &parsed_class)
#define ACC_STATIC
void get_annotation_value_class_refs(const exprt &value)
See java_bytecode_parsert::get_annotation_class_refs.
std::vector< bytecodet > bytecodes
const java_method_typet & to_java_method_type(const typet &type)
Definition: java_types.h:338
java_bytecode_parse_treet::annotationt annotationt
mstreamt & result() const
Definition: message.h:396
const char * mnemonic
Definition: bytecode_info.h:46
#define ACC_INTERFACE
uint8_t u1
Definition: bytecode_info.h:55
std::function< java_bytecode_parsert::pool_entryt &(u2)> pool_entry_lookupt
structured_pool_entryt(java_bytecode_parsert::pool_entryt entry)
Base class for all expressions.
Definition: expr.h:54
#define VTYPE_INFO_INTEGER
java_bytecode_parse_treet::methodt::instructionst instructionst
const parameterst & parameters() const
Definition: std_types.h:893
void read_bootstrapmethods_entry(classt &)
Read all entries of the BootstrapMethods attribute of a class file.
#define ACC_PUBLIC
java_bytecode_parse_treet::fieldt fieldt
#define T_SHORT
void read_verification_type_info(methodt::verification_type_infot &)
u2 get_name_and_type_index() const
static std::string binary(const constant_exprt &src)
Definition: json_expr.cpp:224
#define T_CHAR
method_handle_infot(java_bytecode_parsert::pool_entryt entry)
java_bytecode_parse_treet::methodt methodt
const typet type_entry(u2 index)
#define ACC_ANNOTATION
void rcode_attribute(methodt &method)
Corresponds to the CONSTANT_NameAndType_info Structure Described in Java 8 specification 4....
Expression to hold a symbol (variable)
Definition: std_expr.h:143
const char * c_str() const
Definition: dstring.h:86
int8_t s1
Definition: bytecode_info.h:59
int16_t s2
Definition: bytecode_info.h:60
mstreamt & debug() const
Definition: message.h:416
exprt get_relement_value()
Corresponds to the element_value structure Described in Java 8 specification 4.7.16....
void rRuntimeAnnotation(annotationt &)
const typet & subtype() const
Definition: type.h:38
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:485
java_bytecode_parse_treet::classt::methodst methodst
operandst & operands()
Definition: expr.h:78
void get_class_refs()
Get the class references for the benefit of a dependency analysis.
std::string get_descriptor(pool_entry_lookupt pool_entry) const
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
bool empty() const
Definition: dstring.h:75
java_bytecode_parse_treet::classt::method_handle_typet method_handle_typet
const irept & find(const irep_namet &name) const
Definition: irep.cpp:284
std::vector< irep_idt > rexceptions_attribute()
Corresponds to the element_value structure Described in Java 8 specification 4.7.5 https://docs....
#define CONSTANT_InvokeDynamic
const typet & return_type() const
Definition: std_types.h:883
static std::string read_utf8_constant(const pool_entryt &entry)
Array constructor from list of elements.
Definition: std_expr.h:1739
void set(const irep_namet &name, const irep_idt &value)
Definition: irep.h:286
#define ACC_VARARGS
optionalt< lambda_method_handlet > parse_method_handle(const class method_handle_infot &entry)
Read method handle pointed to from constant pool entry at index, return type of method handle and nam...
static format_containert< T > format(const T &o)
Definition: format.h:35
Definition: kdev_t.h:19