CBMC
remove_asm.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Remove 'asm' statements by compiling them into suitable
4  standard goto program instructions
5 
6 Author: Daniel Kroening
7 
8 Date: December 2014
9 
10 \*******************************************************************/
11 
15 
16 #include "remove_asm.h"
17 
18 #include <util/c_types.h>
19 #include <util/pointer_expr.h>
20 #include <util/prefix.h>
21 #include <util/range.h>
22 #include <util/std_code.h>
23 #include <util/string_constant.h>
24 
27 
28 #include "assembler_parser.h"
29 
31 {
32 public:
34  symbol_tablet &_symbol_table,
35  goto_functionst &_goto_functions,
37  : symbol_table(_symbol_table),
38  goto_functions(_goto_functions),
40  {
41  }
42 
43  void operator()()
44  {
45  for(auto &f : goto_functions.function_map)
46  process_function(f.first, f.second);
47  }
48 
49 protected:
53 
55 
57  const irep_idt &function_id,
58  goto_programt::instructiont &instruction,
59  goto_programt &dest);
60 
62 
64  const irep_idt &,
65  const code_asmt &,
66  goto_programt &dest);
67 
69  const irep_idt &function_base_name,
70  const code_asm_gcct &code,
71  std::size_t n_args,
72  goto_programt &dest);
73 
75  const irep_idt &function_base_name,
76  const exprt::operandst &operands,
77  const code_asmt &code,
78  goto_programt &dest);
79 };
80 
90  const irep_idt &function_base_name,
91  const code_asm_gcct &code,
92  std::size_t n_args,
93  goto_programt &dest)
94 {
95  irep_idt function_identifier = function_base_name;
96 
98 
99  const typet void_pointer = pointer_type(empty_typet());
100 
101  // outputs
102  forall_operands(it, code.outputs())
103  {
104  if(it->operands().size() == 2)
105  {
106  arguments.push_back(typecast_exprt(
107  address_of_exprt(to_binary_expr(*it).op1()), void_pointer));
108  }
109  }
110 
111  // inputs
112  forall_operands(it, code.inputs())
113  {
114  if(it->operands().size() == 2)
115  {
116  arguments.push_back(typecast_exprt(
117  address_of_exprt(to_binary_expr(*it).op1()), void_pointer));
118  }
119  }
120 
121  // An inline asm statement may consist of multiple commands, not all of which
122  // use all of the inputs/outputs of the inline asm statement.
124  arguments.size() >= n_args,
125  "insufficient number of arguments for calling " +
126  id2string(function_identifier),
127  "required arguments: " + std::to_string(n_args),
128  code.pretty());
129  arguments.resize(n_args);
130 
131  code_typet fkt_type{
133  arguments.size(), code_typet::parametert{void_pointer}},
134  empty_typet()};
135 
136  auto fkt = symbol_exprt{function_identifier, fkt_type}.with_source_location(
137  code.source_location());
138 
139  code_function_callt function_call(std::move(fkt), std::move(arguments));
140 
141  dest.add(
142  goto_programt::make_function_call(function_call, code.source_location()));
143 
144  // do we have it?
145  if(!symbol_table.has_symbol(function_identifier))
146  {
147  symbolt symbol{function_identifier, fkt_type, ID_C};
148  symbol.base_name = function_base_name;
149 
150  symbol_table.add(symbol);
151 
152  goto_functions.function_map.emplace(function_identifier, goto_functiont());
153  }
154  else
155  {
157  symbol_table.lookup_ref(function_identifier).type == fkt_type,
158  "types of function " + id2string(function_identifier) + " should match",
159  code.pretty(),
160  symbol_table.lookup_ref(function_identifier).type.pretty(),
161  fkt_type.pretty());
162  }
163 }
164 
174  const irep_idt &function_base_name,
175  const exprt::operandst &operands,
176  const code_asmt &code,
177  goto_programt &dest)
178 {
179  irep_idt function_identifier = function_base_name;
180 
182 
183  const typet void_pointer = pointer_type(empty_typet());
184 
185  for(const auto &op : operands)
186  arguments.push_back(typecast_exprt::conditional_cast(op, void_pointer));
187 
188  code_typet fkt_type{
190  arguments.size(), code_typet::parametert{void_pointer}},
191  empty_typet()};
192 
193  auto fkt = symbol_exprt{function_identifier, fkt_type}.with_source_location(
194  code.source_location());
195  code_function_callt function_call(std::move(fkt), std::move(arguments));
196 
197  dest.add(
198  goto_programt::make_function_call(function_call, code.source_location()));
199 
200  // do we have it?
201  if(!symbol_table.has_symbol(function_identifier))
202  {
203  symbolt symbol{function_identifier, fkt_type, ID_C};
204  symbol.base_name = function_base_name;
205 
206  symbol_table.add(symbol);
207 
208  goto_functions.function_map.emplace(function_identifier, goto_functiont());
209  }
210  else
211  {
213  symbol_table.lookup_ref(function_identifier).type == fkt_type,
214  "function types should match");
215  }
216 }
217 
226  const irep_idt &function_id,
227  goto_programt::instructiont &instruction,
228  goto_programt &dest)
229 {
230  const code_asmt &code = to_code_asm(instruction.get_other());
231 
232  const irep_idt &flavor = code.get_flavor();
233 
234  if(flavor == ID_gcc)
236  else if(flavor == ID_msc)
237  process_instruction_msc(function_id, code, dest);
238  else
239  DATA_INVARIANT(false, "unexpected assembler flavor");
240 }
241 
248  const code_asm_gcct &code,
249  goto_programt &dest)
250 {
251  const irep_idt &i_str = to_string_constant(code.asm_text()).value();
252 
253  std::istringstream str(id2string(i_str));
254  assembler_parsert assembler_parser{message_handler};
255  assembler_parser.in = &str;
256  assembler_parser.parse();
257 
258  goto_programt tmp_dest;
259  bool unknown = false;
260  bool x86_32_locked_atomic = false;
261 
262  for(const auto &instruction : assembler_parser.instructions)
263  {
264  if(instruction.empty())
265  continue;
266 
267 #if 0
268  std::cout << "A ********************\n";
269  for(const auto &ins : instruction)
270  {
271  std::cout << "XX: " << ins.pretty() << '\n';
272  }
273 
274  std::cout << "B ********************\n";
275 #endif
276 
277  // deal with prefixes
278  irep_idt command;
279  unsigned pos = 0;
280 
281  if(
282  instruction.front().id() == ID_symbol &&
283  instruction.front().get(ID_identifier) == "lock")
284  {
285  x86_32_locked_atomic = true;
286  pos++;
287  }
288 
289  // done?
290  if(pos == instruction.size())
291  continue;
292 
293  if(instruction[pos].id() == ID_symbol)
294  {
295  command = instruction[pos].get(ID_identifier);
296  pos++;
297  }
298 
299  if(command == "xchg" || command == "xchgl")
300  x86_32_locked_atomic = true;
301 
302  if(x86_32_locked_atomic)
303  {
305 
306  codet code_fence(ID_fence);
307  code_fence.add_source_location() = code.source_location();
308  code_fence.set(ID_WWfence, true);
309  code_fence.set(ID_RRfence, true);
310  code_fence.set(ID_RWfence, true);
311  code_fence.set(ID_WRfence, true);
312 
313  tmp_dest.add(
314  goto_programt::make_other(code_fence, code.source_location()));
315  }
316 
317  if(command == "fstcw" || command == "fnstcw" || command == "fldcw") // x86
318  {
319  gcc_asm_function_call("__asm_" + id2string(command), code, 1, tmp_dest);
320  }
321  else if(
322  command == "mfence" || command == "lfence" || command == "sfence") // x86
323  {
324  gcc_asm_function_call("__asm_" + id2string(command), code, 0, tmp_dest);
325  }
326  else if(command == ID_sync) // Power
327  {
328  codet code_fence(ID_fence);
329  code_fence.add_source_location() = code.source_location();
330  code_fence.set(ID_WWfence, true);
331  code_fence.set(ID_RRfence, true);
332  code_fence.set(ID_RWfence, true);
333  code_fence.set(ID_WRfence, true);
334  code_fence.set(ID_WWcumul, true);
335  code_fence.set(ID_RWcumul, true);
336  code_fence.set(ID_RRcumul, true);
337  code_fence.set(ID_WRcumul, true);
338 
339  tmp_dest.add(
340  goto_programt::make_other(code_fence, code.source_location()));
341  }
342  else if(command == ID_lwsync) // Power
343  {
344  codet code_fence(ID_fence);
345  code_fence.add_source_location() = code.source_location();
346  code_fence.set(ID_WWfence, true);
347  code_fence.set(ID_RRfence, true);
348  code_fence.set(ID_RWfence, true);
349  code_fence.set(ID_WWcumul, true);
350  code_fence.set(ID_RWcumul, true);
351  code_fence.set(ID_RRcumul, true);
352 
353  tmp_dest.add(
354  goto_programt::make_other(code_fence, code.source_location()));
355  }
356  else if(command == ID_isync) // Power
357  {
358  codet code_fence(ID_fence);
359  code_fence.add_source_location() = code.source_location();
360 
361  tmp_dest.add(
362  goto_programt::make_other(code_fence, code.source_location()));
363  // doesn't do anything by itself,
364  // needs to be combined with branch
365  }
366  else if(command == "dmb" || command == "dsb") // ARM
367  {
368  codet code_fence(ID_fence);
369  code_fence.add_source_location() = code.source_location();
370  code_fence.set(ID_WWfence, true);
371  code_fence.set(ID_RRfence, true);
372  code_fence.set(ID_RWfence, true);
373  code_fence.set(ID_WRfence, true);
374  code_fence.set(ID_WWcumul, true);
375  code_fence.set(ID_RWcumul, true);
376  code_fence.set(ID_RRcumul, true);
377  code_fence.set(ID_WRcumul, true);
378 
379  tmp_dest.add(
380  goto_programt::make_other(code_fence, code.source_location()));
381  }
382  else if(command == "isb") // ARM
383  {
384  codet code_fence(ID_fence);
385  code_fence.add_source_location() = code.source_location();
386 
387  tmp_dest.add(
388  goto_programt::make_other(code_fence, code.source_location()));
389  // doesn't do anything by itself,
390  // needs to be combined with branch
391  }
392  else
393  unknown = true; // give up
394 
395  if(x86_32_locked_atomic)
396  {
398 
399  x86_32_locked_atomic = false;
400  }
401  }
402 
403  if(unknown)
404  {
405  // we give up; we should perhaps print a warning
406  }
407  else
408  dest.destructive_append(tmp_dest);
409 }
410 
418  const irep_idt &function_id,
419  const code_asmt &code,
420  goto_programt &dest)
421 {
422  const irep_idt &i_str = to_string_constant(code.op0()).value();
423 
424  std::istringstream str(id2string(i_str));
425  assembler_parsert assembler_parser{message_handler};
426  assembler_parser.in = &str;
427  assembler_parser.parse();
428 
429  goto_programt tmp_dest;
430  bool unknown = false;
431  bool x86_32_locked_atomic = false;
432 
433  for(const auto &instruction : assembler_parser.instructions)
434  {
435  if(instruction.empty())
436  continue;
437 
438 #if 0
439  std::cout << "A ********************\n";
440  for(const auto &ins : instruction)
441  {
442  std::cout << "XX: " << ins.pretty() << '\n';
443  }
444 
445  std::cout << "B ********************\n";
446 #endif
447 
448  // deal with prefixes
449  irep_idt command;
450  unsigned pos = 0;
451 
452  if(
453  instruction.front().id() == ID_symbol &&
454  instruction.front().get(ID_identifier) == "lock")
455  {
456  x86_32_locked_atomic = true;
457  pos++;
458  }
459 
460  // done?
461  if(pos == instruction.size())
462  continue;
463 
464  if(instruction[pos].id() == ID_symbol)
465  {
466  command = instruction[pos].get(ID_identifier);
467  pos++;
468  }
469 
470  if(command == "xchg" || command == "xchgl")
471  x86_32_locked_atomic = true;
472 
473  if(x86_32_locked_atomic)
474  {
476 
477  codet code_fence(ID_fence);
478  code_fence.add_source_location() = code.source_location();
479  code_fence.set(ID_WWfence, true);
480  code_fence.set(ID_RRfence, true);
481  code_fence.set(ID_RWfence, true);
482  code_fence.set(ID_WRfence, true);
483 
484  tmp_dest.add(
485  goto_programt::make_other(code_fence, code.source_location()));
486  }
487 
488  if(command == "fstcw" || command == "fnstcw" || command == "fldcw") // x86
489  {
491  // try to typecheck the argument
492  if(pos != instruction.size() && instruction[pos].id() == ID_symbol)
493  {
494  const irep_idt &name = instruction[pos].get(ID_identifier);
495  for(const auto &entry : equal_range(symbol_table.symbol_base_map, name))
496  {
497  // global scope symbol, don't replace a local one
498  if(entry.second == name && args[0].id() != ID_address_of)
499  {
500  args[0] =
502  }
503  // parameter or symbol in local scope
504  else if(has_prefix(
505  id2string(entry.second), id2string(function_id) + "::"))
506  {
507  args[0] = address_of_exprt{
508  symbol_table.lookup_ref(entry.second).symbol_expr()};
509  }
510  }
511  }
513  "__asm_" + id2string(command), args, code, tmp_dest);
514  }
515  else if(
516  command == "mfence" || command == "lfence" || command == "sfence") // x86
517  {
518  msc_asm_function_call("__asm_" + id2string(command), {}, code, tmp_dest);
519  }
520  else
521  unknown = true; // give up
522 
523  if(x86_32_locked_atomic)
524  {
526 
527  x86_32_locked_atomic = false;
528  }
529  }
530 
531  if(unknown)
532  {
533  // we give up; we should perhaps print a warning
534  }
535  else
536  dest.destructive_append(tmp_dest);
537 }
538 
545  const irep_idt &function_id,
546  goto_functionst::goto_functiont &goto_function)
547 {
548  bool did_something = false;
549 
550  Forall_goto_program_instructions(it, goto_function.body)
551  {
552  if(it->is_other() && it->get_other().get_statement() == ID_asm)
553  {
554  goto_programt tmp_dest;
555  process_instruction(function_id, *it, tmp_dest);
556  it->turn_into_skip();
557  did_something = true;
558 
559  goto_programt::targett next = it;
560  next++;
561 
562  goto_function.body.destructive_insert(next, tmp_dest);
563  }
564  }
565 
566  if(did_something)
567  remove_skip(goto_function.body);
568 }
569 
576  goto_functionst &goto_functions,
577  symbol_tablet &symbol_table,
578  message_handlert &message_handler)
579 {
580  remove_asmt rem(symbol_table, goto_functions, message_handler);
581  rem();
582 }
583 
592 void remove_asm(goto_modelt &goto_model, message_handlert &message_handler)
593 {
594  remove_asm(
595  goto_model.goto_functions, goto_model.symbol_table, message_handler);
596 }
597 
598 bool has_asm(const goto_functionst &goto_functions)
599 {
600  for(auto &function_it : goto_functions.function_map)
601  for(auto &instruction : function_it.second.body.instructions)
602  if(
603  instruction.is_other() &&
604  instruction.get_other().get_statement() == ID_asm)
605  {
606  return true;
607  }
608 
609  return false;
610 }
611 
612 bool has_asm(const goto_modelt &goto_model)
613 {
614  return has_asm(goto_model.goto_functions);
615 }
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:235
Operator to return the address of an object.
Definition: pointer_expr.h:540
exprt & op1()
Definition: expr.h:136
codet representation of an inline assembler statement, for the gcc flavor.
Definition: std_code.h:1297
exprt & outputs()
Definition: std_code.h:1315
exprt & asm_text()
Definition: std_code.h:1305
exprt & inputs()
Definition: std_code.h:1325
codet representation of an inline assembler statement.
Definition: std_code.h:1253
const irep_idt & get_flavor() const
Definition: std_code.h:1263
goto_instruction_codet representation of a function call statement.
exprt::operandst argumentst
Base type of functions.
Definition: std_types.h:583
std::vector< parametert > parameterst
Definition: std_types.h:585
Data structure for representing an arbitrary statement in a program.
Definition: std_code_base.h:29
exprt & op0()
Definition: expr.h:133
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
The empty type.
Definition: std_types.h:51
std::vector< exprt > operandst
Definition: expr.h:58
source_locationt & add_source_location()
Definition: expr.h:236
const source_locationt & source_location() const
Definition: expr.h:231
A collection of goto functions.
function_mapt function_map
::goto_functiont goto_functiont
A goto function, consisting of function body (see body) and parameter identifiers (see parameter_iden...
Definition: goto_function.h:24
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:31
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:34
This class represents an instruction in the GOTO intermediate representation.
Definition: goto_program.h:181
const goto_instruction_codet & get_other() const
Get the statement for OTHER.
Definition: goto_program.h:314
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:73
instructionst::iterator targett
Definition: goto_program.h:614
void destructive_append(goto_programt &p)
Appends the given program p to *this. p is destroyed.
Definition: goto_program.h:722
static instructiont make_atomic_end(const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:990
static instructiont make_function_call(const code_function_callt &_code, const source_locationt &l=source_locationt::nil())
Create a function call instruction.
static instructiont make_other(const goto_instruction_codet &_code, const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:957
static instructiont make_atomic_begin(const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:979
targett add(instructiont &&instruction)
Adds a given instruction at the end.
Definition: goto_program.h:739
std::string pretty(unsigned indent=0, unsigned max_indent=0) const
Definition: irep.cpp:482
void set(const irep_idt &name, const irep_idt &value)
Definition: irep.h:412
The null pointer constant.
Definition: pointer_expr.h:909
remove_asmt(symbol_tablet &_symbol_table, goto_functionst &_goto_functions, message_handlert &message_handler)
Definition: remove_asm.cpp:33
goto_functionst & goto_functions
Definition: remove_asm.cpp:51
symbol_tablet & symbol_table
Definition: remove_asm.cpp:50
void process_function(const irep_idt &, goto_functionst::goto_functiont &)
Replaces inline assembly instructions in the goto function by non-assembly goto program instructions.
Definition: remove_asm.cpp:544
void operator()()
Definition: remove_asm.cpp:43
void process_instruction_msc(const irep_idt &, const code_asmt &, goto_programt &dest)
Translates the given inline assembly code (in msc style) to non-assembly goto program instructions.
Definition: remove_asm.cpp:417
void msc_asm_function_call(const irep_idt &function_base_name, const exprt::operandst &operands, const code_asmt &code, goto_programt &dest)
Adds a call to a library function that implements the given msc-style inline assembly statement.
Definition: remove_asm.cpp:173
void process_instruction_gcc(const code_asm_gcct &, goto_programt &dest)
Translates the given inline assembly code (in gcc style) to non-assembly goto program instructions.
Definition: remove_asm.cpp:247
message_handlert & message_handler
Definition: remove_asm.cpp:52
void gcc_asm_function_call(const irep_idt &function_base_name, const code_asm_gcct &code, std::size_t n_args, goto_programt &dest)
Adds a call to a library function that implements the given gcc-style inline assembly statement.
Definition: remove_asm.cpp:89
void process_instruction(const irep_idt &function_id, goto_programt::instructiont &instruction, goto_programt &dest)
Translates the given inline assembly code (which must be in either gcc or msc style) to non-assembly ...
Definition: remove_asm.cpp:225
void value(const irep_idt &)
Expression to hold a symbol (variable)
Definition: std_expr.h:131
const symbol_base_mapt & symbol_base_map
Read-only field, used to look up symbol names given their base names.
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
The symbol table.
Definition: symbol_table.h:14
Symbol table entry.
Definition: symbol.h:28
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:121
typet type
Type of symbol.
Definition: symbol.h:31
Semantic type conversion.
Definition: std_expr.h:2068
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition: std_expr.h:2076
The type of an expression, extends irept.
Definition: type.h:29
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
#define forall_operands(it, expr)
Definition: expr.h:20
Symbol Table + CFG.
#define Forall_goto_program_instructions(it, program)
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
literalt pos(literalt a)
Definition: literal.h:194
API to expression classes for Pointers.
Ranges: pair of begin and end iterators, which can be initialized from containers,...
ranget< typename multimapt::const_iterator > equal_range(const multimapt &multimap, const typename multimapt::key_type &key)
Utility function to make equal_range method of multimap easier to use by returning a ranget object.
Definition: range.h:539
void remove_asm(goto_functionst &goto_functions, symbol_tablet &symbol_table, message_handlert &message_handler)
Replaces inline assembly instructions in the goto program (i.e., instructions of kind OTHER with a co...
Definition: remove_asm.cpp:575
bool has_asm(const goto_functionst &goto_functions)
returns true iff the given goto functions use asm instructions
Definition: remove_asm.cpp:598
Remove 'asm' statements by compiling them into suitable standard goto program instructions.
void remove_skip(goto_programt &goto_program, goto_programt::targett begin, goto_programt::targett end)
remove unnecessary skip statements
Definition: remove_skip.cpp:87
Program Transformation.
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:534
#define DATA_INVARIANT_WITH_DIAGNOSTICS(CONDITION, REASON,...)
Definition: invariant.h:535
code_asm_gcct & to_code_asm_gcc(codet &code)
Definition: std_code.h:1373
code_asmt & to_code_asm(codet &code)
Definition: std_code.h:1282
const binary_exprt & to_binary_expr(const exprt &expr)
Cast an exprt to a binary_exprt.
Definition: std_expr.h:715
const string_constantt & to_string_constant(const exprt &expr)
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.