CBMC
java_bytecode_concurrency_instrumentation.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Java Convert Thread blocks
4 
5 Author: Kurt Degiogrio, kurt.degiorgio@diffblue.com
6 
7 \*******************************************************************/
8 
10 
11 #include <util/arith_tools.h>
12 #include <util/cprover_prefix.h>
13 #include <util/expr_iterator.h>
14 #include <util/message.h>
15 #include <util/namespace.h>
16 #include <util/std_types.h>
17 #include <util/symbol_table_base.h>
18 
19 #include "expr2java.h"
20 #include "java_types.h"
21 #include "java_utils.h"
22 
23 // Disable linter to allow an std::string constant.
24 const std::string next_thread_id = CPROVER_PREFIX "_next_thread_id";// NOLINT(*)
25 const std::string thread_id = CPROVER_PREFIX "_thread_id";// NOLINT(*)
26 
37  symbol_table_baset &symbol_table,
38  const irep_idt &name,
39  const irep_idt &base_name,
40  const typet &type,
41  const exprt &value,
42  const bool is_thread_local,
43  const bool is_static_lifetime)
44 {
45  const symbolt *psymbol = nullptr;
46  namespacet ns(symbol_table);
47  ns.lookup(name, psymbol);
48  if(psymbol != nullptr)
49  return *psymbol;
50  symbolt new_symbol{name, type, ID_java};
51  new_symbol.pretty_name = name;
52  new_symbol.base_name = base_name;
53  new_symbol.value = value;
54  new_symbol.is_lvalue = true;
55  new_symbol.is_state_var = true;
56  new_symbol.is_static_lifetime = is_static_lifetime;
57  new_symbol.is_thread_local = is_thread_local;
58  symbol_table.add(new_symbol);
59  return new_symbol;
60 }
61 
66 static const std::string get_first_label_id(const std::string &id)
67 {
68  return CPROVER_PREFIX "_THREAD_ENTRY_" + id;
69 }
70 
75 static const std::string get_second_label_id(const std::string &id)
76 {
77  return CPROVER_PREFIX "_THREAD_EXIT_" + id;
78 }
79 
84 static const std::string get_thread_block_identifier(
85  const code_function_callt &f_code)
86 {
87  PRECONDITION(f_code.arguments().size() == 1);
88  const exprt &expr = f_code.arguments()[0];
89  const mp_integer lbl_id =
90  numeric_cast_v<mp_integer>(to_constant_expr(to_multi_ary_expr(expr).op0()));
91  return integer2string(lbl_id);
92 }
93 
102  const symbol_table_baset &symbol_table,
103  bool is_enter,
104  const exprt &object)
105 {
106  const irep_idt symbol =
107  is_enter ? "java::java.lang.Object.monitorenter:(Ljava/lang/Object;)V"
108  : "java::java.lang.Object.monitorexit:(Ljava/lang/Object;)V";
109 
110  auto it = symbol_table.symbols.find(symbol);
111 
112  // If the 'java::java.lang.Object.monitorenter:(Ljava/lang/Object;)V' or
113  // 'java::java.lang.Object.monitorexit:(Ljava/lang/Object;)V' method is not
114  // found symex will rightfully complain as it cannot find the symbol
115  // associated with the method in question. To avoid this and thereby ensuring
116  // JBMC works both with and without the models, we replace the aforementioned
117  // methods with skips when we cannot find them.
118  //
119  // Note: this can only happen if the java-core-models library is not loaded.
120  //
121  // Note': we explicitly prevent JBMC from stubbing these methods.
122  if(it == symbol_table.symbols.end())
123  return code_skipt();
124 
125  // Otherwise we create a function call
126  return code_function_callt(symbol_exprt(symbol, it->second.type), {object});
127 }
128 
133 static void monitor_exits(codet &code, const codet &monitorexit)
134 {
135  const irep_idt &statement = code.get_statement();
136  if(statement == ID_return)
137  {
138  // Replace the return with a monitor exit plus return block
139  code = code_blockt({monitorexit, code});
140  }
141  else if(
142  statement == ID_label || statement == ID_block ||
143  statement == ID_decl_block)
144  {
145  // If label or block found, explore the code inside the block
146  Forall_operands(it, code)
147  {
148  codet &sub_code = to_code(*it);
149  monitor_exits(sub_code, monitorexit);
150  }
151  }
152 }
153 
207  symbol_table_baset &symbol_table,
208  symbolt &symbol,
209  const exprt &sync_object)
210 {
211  PRECONDITION(!symbol.type.get_bool(ID_is_static));
212 
213  codet &code = to_code(symbol.value);
214 
215  // Get the calls to the functions that implement the critical section
216  codet monitorenter = get_monitor_call(symbol_table, true, sync_object);
217  codet monitorexit = get_monitor_call(symbol_table, false, sync_object);
218 
219  // Create a unique catch label and empty throw type (i.e. any)
220  // and catch-push them at the beginning of the code (i.e. begin try)
221  code_push_catcht catch_push;
222  irep_idt handler("pc-synchronized-catch");
223  irep_idt exception_id;
224  code_push_catcht::exception_listt &exception_list =
225  catch_push.exception_list();
226  exception_list.push_back(
227  code_push_catcht::exception_list_entryt(exception_id, handler));
228 
229  // Create a catch-pop to indicate the end of the try block
230  code_pop_catcht catch_pop;
231 
232  // Create the finally block with the same label targeted in the catch-push
233  const symbolt &tmp_symbol = fresh_java_symbol(
234  java_reference_type(struct_tag_typet("java::java.lang.Throwable")),
235  "caught-synchronized-exception",
236  code.source_location(),
237  id2string(symbol.name),
238  symbol_table);
239  symbol_exprt catch_var(tmp_symbol.name, tmp_symbol.type);
240  catch_var.set(ID_C_base_name, tmp_symbol.base_name);
241  code_landingpadt catch_statement(catch_var);
242  codet catch_instruction = catch_statement;
243  code_labelt catch_label(handler, code_blockt());
244  code_blockt &catch_block = to_code_block(catch_label.code());
245  catch_block.add(catch_instruction);
246  catch_block.add(monitorexit);
247 
248  // Re-throw exception
249  side_effect_expr_throwt throw_expr(irept(), typet(), code.source_location());
250  throw_expr.copy_to_operands(catch_var);
251  catch_block.add(code_expressiont(throw_expr));
252 
253  // Write a monitorexit before every return
254  monitor_exits(code, monitorexit);
255 
256  // Wrap the code into a try finally
257  code = code_blockt({monitorenter, catch_push, code, catch_pop, catch_label});
258 }
259 
270  const code_function_callt &f_code,
271  codet &code,
272  symbol_table_baset &symbol_table)
273 {
274  PRECONDITION(f_code.arguments().size() == 1);
275 
276  // Create global variable __CPROVER__next_thread_id. Used as a counter
277  // in-order to to assign ids to new threads.
278  const symbol_exprt next_symbol_expr = add_or_get_symbol(
279  symbol_table,
282  java_int_type(),
284  false,
285  true)
286  .symbol_expr();
287 
288  // Create thread-local variable __CPROVER__thread_id. Holds the id of
289  // the thread.
290  const symbolt &current_symbol =
292  symbol_table, thread_id, thread_id, java_int_type(),
293  from_integer(0, java_int_type()), true, true);
294 
295  // Construct appropriate labels.
296  // Note: java does not have labels so this should be safe.
297  const std::string &thread_block_id = get_thread_block_identifier(f_code);
298  const std::string &lbl1 = get_first_label_id(thread_block_id);
299  const std::string &lbl2 = get_second_label_id(thread_block_id);
300 
301  // Instrument the following codet's:
302  //
303  // A: codet(id=ID_start_thread, destination=__CPROVER_THREAD_ENTRY_<ID>)
304  // B: codet(id=ID_goto, destination=__CPROVER_THREAD_EXIT_<ID>)
305  // C: codet(id=ID_label, label=__CPROVER_THREAD_ENTRY_<ID>)
306  // C.1: codet(id=ID_atomic_begin)
307  // D: CPROVER__next_thread_id += 1;
308  // E: __CPROVER__thread_id = __CPROVER__next_thread_id;
309  // F: codet(id=ID_atomic_end)
310 
311  codet tmp_a(ID_start_thread);
312  tmp_a.set(ID_destination, lbl1);
313  code_gotot tmp_b(lbl2);
314  const code_labelt tmp_c(lbl1, code_skipt());
315 
316  const plus_exprt plus(next_symbol_expr, from_integer(1, java_int_type()));
317  const code_assignt tmp_d(next_symbol_expr, plus);
318  const code_assignt tmp_e(current_symbol.symbol_expr(), next_symbol_expr);
319 
320  code = code_blockt({tmp_a,
321  tmp_b,
322  tmp_c,
323  codet(ID_atomic_begin),
324  tmp_d,
325  tmp_e,
326  codet(ID_atomic_end)});
327  code.add_source_location() = f_code.source_location();
328 }
329 
340  const code_function_callt &f_code,
341  codet &code,
342  const symbol_table_baset &symbol_table)
343 {
344  PRECONDITION(f_code.arguments().size() == 1);
345  (void)symbol_table; // unused parameter
346 
347  // Build id, used to construct appropriate labels.
348  // Note: java does not have labels so this should be safe
349  const std::string &thread_block_id = get_thread_block_identifier(f_code);
350  const std::string &lbl2 = get_second_label_id(thread_block_id);
351 
352  // Instrument the following code:
353  //
354  // A: codet(id=ID_end_thread)
355  // B: codet(id=ID_label,label= __CPROVER_THREAD_EXIT_<ID>)
356  codet tmp_a(ID_end_thread);
357  const code_labelt tmp_b(lbl2, code_skipt());
358 
359  const auto location = code.source_location();
360  code = code_blockt({tmp_a, tmp_b});
361  code.add_source_location() = location;
362 }
363 
374  const code_function_callt &f_code,
375  codet &code,
376  symbol_table_baset &symbol_table)
377 {
378  PRECONDITION(f_code.arguments().size() == 0);
379 
380  const symbolt& current_symbol =
381  add_or_get_symbol(symbol_table,
382  thread_id,
383  thread_id,
384  java_int_type(),
386  true, true);
387 
388  code_assignt code_assign(f_code.lhs(), current_symbol.symbol_expr());
389  code_assign.add_source_location() = f_code.source_location();
390 
391  code = code_assign;
392 }
393 
405  const code_function_callt &f_code,
406  codet &code,
407  symbol_table_baset &symbol_table)
408 {
409  PRECONDITION(f_code.arguments().size() == 1);
410 
411  const namespacet ns(symbol_table);
412  const auto &object_type = ns.follow_tag(to_struct_tag_type(
413  to_pointer_type(f_code.arguments()[0].type()).base_type()));
414  code_assignt code_assign(
415  f_code.lhs(),
416  member_exprt(
417  dereference_exprt(f_code.arguments()[0]),
418  object_type.get_component("cproverMonitorCount")));
419  code_assign.add_source_location() = f_code.source_location();
420 
421  code = code_assign;
422 }
423 
485 {
486  using instrument_callbackt = std::function<void(
488  using expr_replacement_mapt =
489  std::unordered_map<const exprt, instrument_callbackt, irep_hash>;
490 
491  namespacet ns(symbol_table);
492 
493  for(const auto &entry : symbol_table)
494  {
495  expr_replacement_mapt expr_replacement_map;
496  const symbolt &symbol = entry.second;
497 
498  // Look for code_function_callt's (without breaking sharing)
499  // and insert each one into a replacement map along with an associated
500  // callback that will handle their instrumentation.
501  for(auto it = symbol.value.depth_begin(), itend = symbol.value.depth_end();
502  it != itend; ++it)
503  {
504  instrument_callbackt cb;
505 
506  const exprt &expr = *it;
507  if(expr.id() != ID_code)
508  continue;
509 
510  const codet &code = to_code(expr);
511  if(code.get_statement() != ID_function_call)
512  continue;
513 
514  const code_function_callt &f_code = to_code_function_call(code);
515  const std::string &f_name = expr2java(f_code.function(), ns);
516  if(f_name == "org.cprover.CProver.startThread:(I)V")
517  cb = std::bind(
519  std::placeholders::_1,
520  std::placeholders::_2,
521  std::placeholders::_3);
522  else if(f_name == "org.cprover.CProver.endThread:(I)V")
523  cb = std::bind(
525  std::placeholders::_1,
526  std::placeholders::_2,
527  std::placeholders::_3);
528  else if(f_name == "org.cprover.CProver.getCurrentThreadId:()I")
529  cb = std::bind(
531  std::placeholders::_1,
532  std::placeholders::_2,
533  std::placeholders::_3);
534  else if(
535  f_name == "org.cprover.CProver.getMonitorCount:(Ljava/lang/Object;)I")
536  cb = std::bind(
538  std::placeholders::_1,
539  std::placeholders::_2,
540  std::placeholders::_3);
541 
542  if(cb)
543  expr_replacement_map.insert({expr, cb});
544  }
545 
546  if(!expr_replacement_map.empty())
547  {
548  symbolt &w_symbol = symbol_table.get_writeable_ref(entry.first);
549  // Use expr_replacment_map to look for exprt's that need to be replaced.
550  // Once found, get a non-const exprt (breaking sharing in the process) and
551  // call it's associated instrumentation function.
552  for(auto it = w_symbol.value.depth_begin(),
553  itend = w_symbol.value.depth_end(); it != itend;)
554  {
555  expr_replacement_mapt::iterator m_it = expr_replacement_map.find(*it);
556  if(m_it != expr_replacement_map.end())
557  {
558  codet &code = to_code(it.mutate());
559  const code_function_callt &f_code = to_code_function_call(code);
560  m_it->second(f_code, code, symbol_table);
561  it.next_sibling_or_parent();
562 
563  expr_replacement_map.erase(m_it);
564  if(expr_replacement_map.empty())
565  break;
566  }
567  else
568  ++it;
569  }
570  }
571  }
572 }
573 
603  symbol_table_baset &symbol_table,
604  message_handlert &message_handler)
605 {
606  namespacet ns(symbol_table);
607  for(auto s_it = symbol_table.begin(); s_it != symbol_table.end(); ++s_it)
608  {
609  const symbolt &symbol = s_it->second;
610 
611  if(symbol.type.id() != ID_code)
612  continue;
613  if(symbol.value.is_nil())
614  continue;
615  if(!symbol.type.get_bool(ID_is_synchronized))
616  continue;
617 
618  if(symbol.type.get_bool(ID_is_static))
619  {
620  messaget message(message_handler);
621  message.warning() << "Java method '" << s_it->first
622  << "' is static and synchronized."
623  << " This is unsupported, the synchronized keyword"
624  << " will be ignored." << messaget::eom;
625  continue;
626  }
627 
628  // find the object to synchronize on
629  const irep_idt this_id(id2string(symbol.name) + "::this");
630  exprt this_expr(this_id);
631 
632  auto it = symbol_table.symbols.find(this_id);
633 
634  if(it == symbol_table.symbols.end())
635  // failed to find object to synchronize on
636  continue;
637 
638  // get writeable reference and instrument the method
639  symbolt &w_symbol = s_it.get_writeable_symbol();
641  symbol_table, w_symbol, it->second.symbol_expr());
642  }
643 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
A goto_instruction_codet representing an assignment in the program.
A codet representing sequential composition of program statements.
Definition: std_code.h:130
void add(const codet &code)
Definition: std_code.h:168
codet representation of an expression statement.
Definition: std_code.h:1394
goto_instruction_codet representation of a function call statement.
codet representation of a goto statement.
Definition: std_code.h:841
codet representation of a label for branch targets.
Definition: std_code.h:959
codet & code()
Definition: std_code.h:977
A statement that catches an exception, assigning the exception in flight to an expression (e....
Definition: std_code.h:1936
Pops an exception handler from the stack of active handlers (i.e.
Definition: std_code.h:1899
Pushes an exception handler, of the form: exception_tag1 -> label1 exception_tag2 -> label2 ....
Definition: std_code.h:1805
std::vector< exception_list_entryt > exception_listt
Definition: std_code.h:1849
exception_listt & exception_list()
Definition: std_code.h:1860
A codet representing a skip statement.
Definition: std_code.h:322
Data structure for representing an arbitrary statement in a program.
Definition: std_code_base.h:29
const irep_idt & get_statement() const
Definition: std_code_base.h:65
Operator to dereference a pointer.
Definition: pointer_expr.h:834
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
Base class for all expressions.
Definition: expr.h:56
void copy_to_operands(const exprt &expr)
Copy the given argument to the end of exprt's operands.
Definition: expr.h:163
depth_iteratort depth_end()
Definition: expr.cpp:249
source_locationt & add_source_location()
Definition: expr.h:236
depth_iteratort depth_begin()
Definition: expr.cpp:247
const source_locationt & source_location() const
Definition: expr.h:231
There are a large number of kinds of tree structured or tree-like data in CPROVER.
Definition: irep.h:364
bool get_bool(const irep_idt &name) const
Definition: irep.cpp:57
void set(const irep_idt &name, const irep_idt &value)
Definition: irep.h:412
const irep_idt & id() const
Definition: irep.h:388
bool is_nil() const
Definition: irep.h:368
Extract member of struct or union.
Definition: std_expr.h:2854
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:154
mstreamt & warning() const
Definition: message.h:396
static eomt eom
Definition: message.h:289
const union_typet & follow_tag(const union_tag_typet &) const
Follow type tag of union type.
Definition: namespace.cpp:63
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:94
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
Definition: namespace.cpp:148
The plus expression Associativity is not specified.
Definition: std_expr.h:1002
const typet & base_type() const
The type of the data what we point to.
Definition: pointer_expr.h:35
A side_effect_exprt representation of a side effect that throws an exception.
Definition: std_code.h:1757
A struct tag type, i.e., struct_typet with an identifier.
Definition: std_types.h:493
Expression to hold a symbol (variable)
Definition: std_expr.h:131
The symbol table base class interface.
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
virtual iteratort begin()=0
const symbolst & symbols
Read-only field, used to look up symbols given their names.
virtual iteratort end()=0
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
Symbol table entry.
Definition: symbol.h:28
irep_idt base_name
Base (non-scoped) name.
Definition: symbol.h:46
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:121
typet type
Type of symbol.
Definition: symbol.h:31
irep_idt name
The unique identifier.
Definition: symbol.h:40
exprt value
Initial value of symbol.
Definition: symbol.h:34
The type of an expression, extends irept.
Definition: type.h:29
#define CPROVER_PREFIX
std::string expr2java(const exprt &expr, const namespacet &ns)
Definition: expr2java.cpp:454
#define Forall_operands(it, expr)
Definition: expr.h:27
Forward depth-first search iterators These iterators' copy operations are expensive,...
const code_function_callt & to_code_function_call(const goto_instruction_codet &code)
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
const std::string next_thread_id
const std::string thread_id
static void instrument_get_monitor_count(const code_function_callt &f_code, codet &code, symbol_table_baset &symbol_table)
Transforms the codet stored in in f_code, which is a call to function CProver.getMonitorCount:(Ljava/...
static void instrument_start_thread(const code_function_callt &f_code, codet &code, symbol_table_baset &symbol_table)
Transforms the codet stored in in f_code, which is a call to function CProver.startThread:(I)V into a...
static const std::string get_thread_block_identifier(const code_function_callt &f_code)
Retrieves a thread block identifier from a function call to CProver.startThread:(I)V or CProver....
static symbolt add_or_get_symbol(symbol_table_baset &symbol_table, const irep_idt &name, const irep_idt &base_name, const typet &type, const exprt &value, const bool is_thread_local, const bool is_static_lifetime)
Adds a new symbol to the symbol table if it doesn't exist.
static void instrument_end_thread(const code_function_callt &f_code, codet &code, const symbol_table_baset &symbol_table)
Transforms the codet stored in in f_code, which is a call to function CProver.endThread:(I)V into a b...
static void monitor_exits(codet &code, const codet &monitorexit)
Introduces a monitorexit before every return recursively.
void convert_synchronized_methods(symbol_table_baset &symbol_table, message_handlert &message_handler)
Iterate through the symbol table to find and instrument synchronized methods.
static const std::string get_first_label_id(const std::string &id)
Retrieve the first label identifier.
static const std::string get_second_label_id(const std::string &id)
Retrieve the second label identifier.
static codet get_monitor_call(const symbol_table_baset &symbol_table, bool is_enter, const exprt &object)
Creates a monitorenter/monitorexit code_function_callt for the given synchronization object.
static void instrument_synchronized_code(symbol_table_baset &symbol_table, symbolt &symbol, const exprt &sync_object)
Transforms the codet stored in symbol.value.
static void instrument_get_current_thread_id(const code_function_callt &f_code, codet &code, symbol_table_baset &symbol_table)
Transforms the codet stored in in f_code, which is a call to function CProver.getCurrentThreadId:()I ...
void convert_threadblock(symbol_table_baset &symbol_table)
Iterate through the symbol table to find and appropriately instrument thread-blocks.
signedbv_typet java_int_type()
Definition: java_types.cpp:31
reference_typet java_reference_type(const typet &subtype)
Definition: java_types.cpp:88
symbolt & fresh_java_symbol(const typet &type, const std::string &basename_prefix, const source_locationt &source_location, const irep_idt &function_name, symbol_table_baset &symbol_table)
Definition: java_utils.cpp:555
const std::string integer2string(const mp_integer &n, unsigned base)
Definition: mp_arith.cpp:103
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: pointer_expr.h:93
BigInt mp_integer
Definition: smt_terms.h:17
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
const code_blockt & to_code_block(const codet &code)
Definition: std_code.h:203
const codet & to_code(const exprt &expr)
const constant_exprt & to_constant_expr(const exprt &expr)
Cast an exprt to a constant_exprt.
Definition: std_expr.h:3055
const multi_ary_exprt & to_multi_ary_expr(const exprt &expr)
Cast an exprt to a multi_ary_exprt.
Definition: std_expr.h:987
Pre-defined types.
const struct_tag_typet & to_struct_tag_type(const typet &type)
Cast a typet to a struct_tag_typet.
Definition: std_types.h:518
Author: Diffblue Ltd.