CBMC
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
java_string_literals.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: Java string literal processing
4
5Author: Chris Smowton, chris.smowton@diffblue.com
6
7\*******************************************************************/
8
10#include "java_root_class.h"
12#include "java_types.h"
13#include "java_utils.h"
14
16#include <util/arith_tools.h>
18#include <util/namespace.h>
19#include <util/string_utils.h>
21#include <util/unicode.h>
22
26static array_exprt utf16_to_array(const std::wstring &in)
27{
28 const auto jchar=java_char_type();
30 {}, array_typet(jchar, from_integer(in.length(), java_int_type())));
31 for(const auto c : in)
32 ret.copy_to_operands(from_integer(c, jchar));
33 return ret;
34}
35
38 symbol_table_baset &symbol_table,
39 bool string_refinement_enabled)
40{
41 const irep_idt value = string_expr.value();
42 const struct_tag_typet string_type("java::java.lang.String");
43
44 const std::string escaped_symbol_name = escape_non_alnum(id2string(value));
45 const std::string escaped_symbol_name_with_prefix =
47
48 auto findit = symbol_table.symbols.find(escaped_symbol_name_with_prefix);
49 if(findit != symbol_table.symbols.end())
50 return findit->second.symbol_expr();
51
52#ifndef CPROVER_INVARIANT_DO_NOT_CHECK
53 const auto initialize_function = symbol_table.lookup(INITIALIZE_FUNCTION);
55 !initialize_function || initialize_function->value.is_nil(),
56 "Cannot create more string literals after making " INITIALIZE_FUNCTION);
57#endif
58
59 // Create a new symbol:
61 new_symbol.type.set(ID_C_constant, true);
62 new_symbol.base_name = escaped_symbol_name;
63 new_symbol.pretty_name = value;
64 new_symbol.is_lvalue = true;
65 new_symbol.is_state_var = true;
66 new_symbol.is_static_lifetime = true;
67
68 namespacet ns(symbol_table);
69
70 // Regardless of string refinement setting, at least initialize
71 // the literal with @clsid = String
72 struct_tag_typet jlo_symbol("java::java.lang.Object");
73 const auto &jlo_struct = ns.follow_tag(jlo_symbol);
75 const auto &jls_struct = ns.follow_tag(string_type);
76 java_root_class_init(jlo_init, jlo_struct, "java::java.lang.String");
77
78 // If string refinement *is* around, populate the actual
79 // contents as well:
80 if(string_refinement_enabled)
81 {
82 const array_exprt data =
84
85 struct_exprt literal_init({}, new_symbol.type);
86 literal_init.operands().resize(jls_struct.components().size());
87 const std::size_t jlo_nb = jls_struct.component_number("@java.lang.Object");
88 literal_init.operands()[jlo_nb] = jlo_init;
89
90 const std::size_t length_nb = jls_struct.component_number("length");
91 const typet &length_type = jls_struct.components()[length_nb].type();
92 const exprt length = from_integer(data.operands().size(), length_type);
93 literal_init.operands()[length_nb] = length;
94
95 // Initialize the string with a constant utf-16 array:
96 symbolt array_symbol{
97 escaped_symbol_name_with_prefix + "_constarray", data.type(), ID_java};
98 array_symbol.base_name = escaped_symbol_name + "_constarray";
99 array_symbol.pretty_name = value;
100 array_symbol.is_lvalue = true;
101 // These are basically const global data:
102 array_symbol.is_static_lifetime = true;
103 array_symbol.is_state_var = true;
104 array_symbol.value = data;
105 array_symbol.type.set(ID_C_constant, true);
106
107 if(symbol_table.add(array_symbol))
108 throw "failed to add constarray symbol to symbol table";
109
110 const symbol_exprt array_expr = array_symbol.symbol_expr();
113
114 const std::size_t data_nb = jls_struct.component_number("data");
115 literal_init.operands()[data_nb] = array_pointer;
116
117 // Associate array with pointer
119 escaped_symbol_name_with_prefix + "_return_value", typet{}, ID_java};
120 return_symbol.base_name = escaped_symbol_name + "_return_value";
121 return_symbol.pretty_name =
122 escaped_symbol_name.length() > 10
123 ? escaped_symbol_name.substr(0, 10) + "..._return_value"
124 : escaped_symbol_name + "_return_value";
125 return_symbol.is_lvalue = true;
126 return_symbol.is_static_lifetime = true;
127 return_symbol.is_state_var = true;
130 {array_symbol.value, array_pointer},
132 symbol_table);
133 return_symbol.type = return_symbol.value.type();
134 return_symbol.type.set(ID_C_constant, true);
135 if(symbol_table.add(return_symbol))
136 throw "failed to add return symbol to symbol table";
137
138 // Initialise the literal structure with
139 // (ABC_return_value, { ..., .length = N, .data = &ABC_constarray }),
140 // using a C-style comma expression to indicate that we need the
141 // _return_value global for its side-effects.
143 init_comma_expr.type() = literal_init.type();
144 init_comma_expr.add_to_operands(
145 return_symbol.symbol_expr(), std::move(literal_init));
146 new_symbol.value = init_comma_expr;
147 }
148 else if(jls_struct.components().size()>=1 &&
149 jls_struct.components()[0].get_name()=="@java.lang.Object")
150 {
151 // Case where something defined java.lang.String, so it has
152 // a proper base class (always java.lang.Object in practical
153 // JDKs seen so far)
154 struct_exprt literal_init({std::move(jlo_init)}, new_symbol.type);
155 for(const auto &comp : jls_struct.components())
156 {
157 if(comp.get_name()=="@java.lang.Object")
158 continue;
159 // Other members of JDK's java.lang.String we don't understand
160 // without string-refinement. Just zero-init them; consider using
161 // test-gen-like nondet object trees instead.
162 const auto init =
163 zero_initializer(comp.type(), string_expr.source_location(), ns);
164 CHECK_RETURN(init.has_value());
165 literal_init.copy_to_operands(*init);
166 }
167 new_symbol.value = literal_init;
168 }
169 else if(to_java_class_type(jls_struct).get_is_stub())
170 {
171 // Case where java.lang.String was stubbed, and so directly defines
172 // @class_identifier
173 new_symbol.value = jlo_init;
174 new_symbol.value.type() = string_type;
175 }
176
177 bool add_failed = symbol_table.add(new_symbol);
178 INVARIANT(
179 !add_failed,
180 "string literal symbol was already checked not to be "
181 "in the symbol table, so adding it should succeed");
182
183 return new_symbol.symbol_expr();
184}
185
187 const irep_idt &string_value,
188 symbol_table_baset &symbol_table,
189 bool string_refinement_enabled)
190{
193 symbol_table,
194 string_refinement_enabled);
195}
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Operator to return the address of an object.
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:562
Array constructor from list of elements.
Definition std_expr.h:1621
const array_typet & type() const
Definition std_expr.h:1628
Arrays with given size.
Definition std_types.h:807
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition dstring.h:38
Base class for all expressions.
Definition expr.h:56
operandst & operands()
Definition expr.h:94
Array index operator.
Definition std_expr.h:1470
const union_typet & follow_tag(const union_tag_typet &) const
Follow type tag of union type.
Definition namespace.cpp:49
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition namespace.h:91
Struct constructor from list of elements.
Definition std_expr.h:1877
A struct tag type, i.e., struct_typet with an identifier.
Definition std_types.h:493
Expression to hold a symbol (variable)
Definition std_expr.h:131
The symbol table base class interface.
const symbolt * lookup(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
const symbolst & symbols
Read-only field, used to look up symbols given their names.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
Symbol table entry.
Definition symbol.h:28
The type of an expression, extends irept.
Definition type.h:29
std::optional< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Expression Initialization.
const std::string & id2string(const irep_idt &d)
Definition irep.h:44
void java_root_class_init(struct_exprt &jlo, const struct_typet &root_type, const irep_idt &class_identifier)
Adds members for an object of the root class (usually java.lang.Object).
Representation of a constant Java string.
static array_exprt utf16_to_array(const std::wstring &in)
Convert UCS-2 or UTF-16 to an array expression.
symbol_exprt get_or_create_string_literal_symbol(const java_string_literal_exprt &string_expr, symbol_table_baset &symbol_table, bool string_refinement_enabled)
Creates or gets an existing constant global symbol for a given string literal.
signedbv_typet java_int_type()
unsignedbv_typet java_char_type()
const java_class_typet & to_java_class_type(const typet &type)
Definition java_types.h:581
exprt make_function_application(const irep_idt &function_name, const exprt::operandst &arguments, const typet &range, symbol_table_baset &symbol_table)
Create a (mathematical) function application expression.
#define JAVA_STRING_LITERAL_PREFIX
Definition java_utils.h:103
#define CHECK_RETURN(CONDITION)
Definition invariant.h:495
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition invariant.h:423
#define INITIALIZE_FUNCTION
std::string escape_non_alnum(const std::string &to_escape)
Replace non-alphanumeric characters with _xx escapes, where xx are hex digits.
Author: Diffblue Ltd.
std::wstring utf8_to_utf16_native_endian(const std::string &in)
Convert UTF8-encoded string to UTF-16 with architecture-native endianness.
Definition unicode.cpp:191