CBMC
local_may_alias.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Field-insensitive, location-sensitive may-alias analysis
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
12 #include "local_may_alias.h"
13 
14 #include <iterator>
15 #include <algorithm>
16 
17 #include <util/arith_tools.h>
18 #include <util/pointer_expr.h>
19 #include <util/std_code.h>
20 
21 #include <util/c_types.h>
22 #include <langapi/language_util.h>
23 
26 {
27  bool changed=false;
28 
29  // do union; this should be amortized linear
30  for(std::size_t i=0; i<src.aliases.size(); i++)
31  {
32  std::size_t root=src.aliases.find(i);
33 
34  if(!aliases.same_set(i, root))
35  {
36  aliases.make_union(i, root);
37  changed=true;
38  }
39  }
40 
41  return changed;
42 }
43 
45  const exprt &lhs,
46  const exprt &rhs,
47  const loc_infot &loc_info_src,
48  loc_infot &loc_info_dest)
49 {
50  if(lhs.id()==ID_symbol)
51  {
52  if(lhs.type().id()==ID_pointer)
53  {
54  unsigned dest_pointer=objects.number(lhs);
55 
56  // isolate the lhs pointer
57  loc_info_dest.aliases.isolate(dest_pointer);
58 
59  object_sett rhs_set;
60  get_rec(rhs_set, rhs, loc_info_src);
61 
62  // make these all aliases
63  for(object_sett::const_iterator
64  p_it=rhs_set.begin();
65  p_it!=rhs_set.end();
66  p_it++)
67  {
68  loc_info_dest.aliases.make_union(dest_pointer, *p_it);
69  }
70  }
71  }
72  else if(lhs.id()==ID_dereference)
73  {
74  // this might invalidate all pointers that are
75  // a) local and dirty
76  // b) global
77  if(lhs.type().id()==ID_pointer)
78  {
79  for(std::size_t i=0; i<objects.size(); i++)
80  {
81  if(objects[i].id()==ID_symbol)
82  {
83  const irep_idt &identifier=
85 
86  if(dirty(identifier) || !locals.is_local(identifier))
87  {
88  loc_info_dest.aliases.isolate(i);
89  loc_info_dest.aliases.make_union(i, unknown_object);
90  }
91  }
92  }
93  }
94  }
95  else if(lhs.id()==ID_index)
96  {
97  assign_lhs(to_index_expr(lhs).array(), rhs, loc_info_src, loc_info_dest);
98  }
99  else if(lhs.id()==ID_member)
100  {
101  assign_lhs(
102  to_member_expr(lhs).struct_op(), rhs, loc_info_src, loc_info_dest);
103  }
104  else if(lhs.id()==ID_typecast)
105  {
106  assign_lhs(to_typecast_expr(lhs).op(), rhs, loc_info_src, loc_info_dest);
107  }
108  else if(lhs.id()==ID_if)
109  {
110  assign_lhs(to_if_expr(lhs).true_case(), rhs, loc_info_src, loc_info_dest);
111  assign_lhs(to_if_expr(lhs).false_case(), rhs, loc_info_src, loc_info_dest);
112  }
113 }
114 
115 std::set<exprt> local_may_aliast::get(
117  const exprt &rhs) const
118 {
119  local_cfgt::loc_mapt::const_iterator loc_it=cfg.loc_map.find(t);
120  CHECK_RETURN(loc_it != cfg.loc_map.end());
121 
122  const loc_infot &loc_info_src=loc_infos[loc_it->second];
123 
124  object_sett result_tmp;
125  get_rec(result_tmp, rhs, loc_info_src);
126 
127  std::set<exprt> result;
128 
129  for(object_sett::const_iterator
130  it=result_tmp.begin();
131  it!=result_tmp.end();
132  it++)
133  {
134  result.insert(objects[*it]);
135  }
136 
137  return result;
138 }
139 
142  const exprt &src1, const exprt &src2) const
143 {
144  local_cfgt::loc_mapt::const_iterator loc_it=cfg.loc_map.find(t);
145  CHECK_RETURN(loc_it != cfg.loc_map.end());
146 
147  const loc_infot &loc_info_src=loc_infos[loc_it->second];
148 
149  object_sett tmp1, tmp2;
150  get_rec(tmp1, src1, loc_info_src);
151  get_rec(tmp2, src2, loc_info_src);
152 
153  if(tmp1.find(unknown_object)!=tmp1.end() ||
154  tmp2.find(unknown_object)!=tmp2.end())
155  return true;
156 
157  std::list<unsigned> result;
158 
159  std::set_intersection(
160  tmp1.begin(), tmp1.end(),
161  tmp2.begin(), tmp2.end(),
162  std::back_inserter(result));
163 
164  return !result.empty();
165 }
166 
168  object_sett &dest,
169  const exprt &rhs,
170  const loc_infot &loc_info_src) const
171 {
172  if(rhs.is_constant())
173  {
174  if(rhs.is_zero())
175  dest.insert(objects.number(exprt(ID_null_object)));
176  else
177  dest.insert(objects.number(exprt(ID_integer_address_object)));
178  }
179  else if(rhs.id()==ID_symbol)
180  {
181  if(rhs.type().id()==ID_pointer)
182  {
183  unsigned src_pointer=objects.number(rhs);
184 
185  dest.insert(src_pointer);
186 
187  for(std::size_t i=0; i<loc_info_src.aliases.size(); i++)
188  if(loc_info_src.aliases.same_set(src_pointer, i))
189  dest.insert(i);
190  }
191  else
192  dest.insert(unknown_object);
193  }
194  else if(rhs.id()==ID_if)
195  {
196  get_rec(dest, to_if_expr(rhs).false_case(), loc_info_src);
197  get_rec(dest, to_if_expr(rhs).true_case(), loc_info_src);
198  }
199  else if(rhs.id()==ID_address_of)
200  {
201  const exprt &object=to_address_of_expr(rhs).object();
202 
203  if(object.id()==ID_symbol)
204  {
205  unsigned object_nr=objects.number(rhs);
206  dest.insert(object_nr);
207 
208  for(std::size_t i=0; i<loc_info_src.aliases.size(); i++)
209  if(loc_info_src.aliases.same_set(object_nr, i))
210  dest.insert(i);
211  }
212  else if(object.id()==ID_index)
213  {
214  const index_exprt &index_expr=to_index_expr(object);
215  if(index_expr.array().id()==ID_symbol)
216  {
217  index_exprt tmp1=index_expr;
218  tmp1.index() = from_integer(0, c_index_type());
219  address_of_exprt tmp2(tmp1);
220  unsigned object_nr=objects.number(tmp2);
221  dest.insert(object_nr);
222 
223  for(std::size_t i=0; i<loc_info_src.aliases.size(); i++)
224  if(loc_info_src.aliases.same_set(object_nr, i))
225  dest.insert(i);
226  }
227  else if(index_expr.array().id()==ID_string_constant)
228  {
229  index_exprt tmp1=index_expr;
230  tmp1.index() = from_integer(0, c_index_type());
231  address_of_exprt tmp2(tmp1);
232  unsigned object_nr=objects.number(tmp2);
233  dest.insert(object_nr);
234 
235  for(std::size_t i=0; i<loc_info_src.aliases.size(); i++)
236  if(loc_info_src.aliases.same_set(object_nr, i))
237  dest.insert(i);
238  }
239  else
240  dest.insert(unknown_object);
241  }
242  else
243  dest.insert(unknown_object);
244  }
245  else if(rhs.id()==ID_typecast)
246  {
247  get_rec(dest, to_typecast_expr(rhs).op(), loc_info_src);
248  }
249  else if(rhs.id()==ID_plus)
250  {
251  const auto &plus_expr = to_plus_expr(rhs);
252 
253  if(plus_expr.operands().size() >= 3)
254  {
256  plus_expr.op0().type().id() == ID_pointer,
257  "pointer in pointer-typed sum must be op0");
258  get_rec(dest, plus_expr.op0(), loc_info_src);
259  }
260  else if(plus_expr.operands().size() == 2)
261  {
262  // one must be pointer, one an integer
263  if(plus_expr.op0().type().id() == ID_pointer)
264  {
265  get_rec(dest, plus_expr.op0(), loc_info_src);
266  }
267  else if(plus_expr.op1().type().id() == ID_pointer)
268  {
269  get_rec(dest, plus_expr.op1(), loc_info_src);
270  }
271  else
272  dest.insert(unknown_object);
273  }
274  else
275  dest.insert(unknown_object);
276  }
277  else if(rhs.id()==ID_minus)
278  {
279  const auto &op0 = to_minus_expr(rhs).op0();
280 
281  if(op0.type().id() == ID_pointer)
282  {
283  get_rec(dest, op0, loc_info_src);
284  }
285  else
286  dest.insert(unknown_object);
287  }
288  else if(rhs.id()==ID_member)
289  {
290  dest.insert(unknown_object);
291  }
292  else if(rhs.id()==ID_index)
293  {
294  dest.insert(unknown_object);
295  }
296  else if(rhs.id()==ID_dereference)
297  {
298  dest.insert(unknown_object);
299  }
300  else if(rhs.id()==ID_side_effect)
301  {
302  const side_effect_exprt &side_effect_expr=to_side_effect_expr(rhs);
303  const irep_idt &statement=side_effect_expr.get_statement();
304 
305  if(statement==ID_allocate)
306  {
307  dest.insert(objects.number(exprt(ID_dynamic_object)));
308  }
309  else
310  dest.insert(unknown_object);
311  }
312  else if(rhs.is_nil())
313  {
314  // this means 'empty'
315  }
316  else
317  dest.insert(unknown_object);
318 }
319 
320 void local_may_aliast::build(const goto_functiont &goto_function)
321 {
322  if(cfg.nodes.empty())
323  return;
324 
325  work_queuet work_queue;
326 
327  // put all nodes into work queue
328  for(local_cfgt::node_nrt n=0; n<cfg.nodes.size(); n++)
329  work_queue.push(n);
330 
331  unknown_object=objects.number(exprt(ID_unknown));
332 
333  loc_infos.resize(cfg.nodes.size());
334 
335  (void)goto_function; // unused parameter
336 #if 0
337  // feed in sufficiently bad defaults
338  for(code_typet::parameterst::const_iterator
339  it=goto_function.type.parameters().begin();
340  it!=goto_function.type.parameters().end();
341  it++)
342  {
343  const irep_idt &identifier=it->get_identifier();
344  if(is_tracked(identifier))
345  loc_infos[0].points_to[objects.number(identifier)].objects.insert(
347  }
348 #endif
349 
350 #if 0
351  for(localst::locals_mapt::const_iterator
352  l_it=locals.locals_map.begin();
353  l_it!=locals.locals_map.end();
354  l_it++)
355  {
356  if(is_tracked(l_it->first))
357  loc_infos[0].aliases.make_union(
358  objects.number(l_it->second), unknown_object);
359  }
360 #endif
361 
362  while(!work_queue.empty())
363  {
364  local_cfgt::node_nrt loc_nr=work_queue.top();
365  const local_cfgt::nodet &node=cfg.nodes[loc_nr];
366  const goto_programt::instructiont &instruction=*node.t;
367  work_queue.pop();
368 
369  const loc_infot &loc_info_src=loc_infos[loc_nr];
370  loc_infot loc_info_dest=loc_infos[loc_nr];
371 
372  switch(instruction.type())
373  {
374  case ASSIGN:
375  {
376  assign_lhs(
377  instruction.assign_lhs(),
378  instruction.assign_rhs(),
379  loc_info_src,
380  loc_info_dest);
381  break;
382  }
383 
384  case DECL:
385  assign_lhs(
386  instruction.decl_symbol(), nil_exprt(), loc_info_src, loc_info_dest);
387  break;
388 
389  case DEAD:
390  assign_lhs(
391  instruction.dead_symbol(), nil_exprt(), loc_info_src, loc_info_dest);
392  break;
393 
394  case FUNCTION_CALL:
395  {
396  const auto &lhs = instruction.call_lhs();
397  if(lhs.is_not_nil())
398  assign_lhs(lhs, nil_exprt(), loc_info_src, loc_info_dest);
399 
400  // this might invalidate all pointers that are
401  // a) local and dirty
402  // b) global
403  for(std::size_t i = 0; i < objects.size(); i++)
404  {
405  if(objects[i].id() == ID_symbol)
406  {
407  const irep_idt &identifier =
409 
410  if(dirty(identifier) || !locals.is_local(identifier))
411  {
412  loc_info_dest.aliases.isolate(i);
413  loc_info_dest.aliases.make_union(i, unknown_object);
414  }
415  }
416  }
417  break;
418  }
419 
420  case CATCH:
421  case THROW:
422  DATA_INVARIANT(false, "Exceptions must be removed before analysis");
423  break;
424  case SET_RETURN_VALUE:
425 #if 0
426  DATA_INVARIANT(false, "SET_RETURN_VALUE must be removed before analysis");
427 #endif
428  break;
429  case GOTO: // Ignoring the guard is a valid over-approximation
430  case START_THREAD: // Require a concurrent analysis at higher level
431  case END_THREAD: // Require a concurrent analysis at higher level
432  case ATOMIC_BEGIN: // Ignoring is a valid over-approximation
433  case ATOMIC_END: // Ignoring is a valid over-approximation
434  case LOCATION: // No action required
435  case SKIP: // No action required
436  case END_FUNCTION: // No action required
437  case ASSERT: // No action required
438  case ASSUME: // Ignoring is a valid over-approximation
439  break;
440  case OTHER:
441 #if 0
443  false, "Unclear what is a safe over-approximation of OTHER");
444 #endif
445  break;
446  case INCOMPLETE_GOTO:
447  case NO_INSTRUCTION_TYPE:
448  DATA_INVARIANT(false, "Only complete instructions can be analyzed");
449  break;
450  }
451 
452  for(local_cfgt::successorst::const_iterator
453  it=node.successors.begin();
454  it!=node.successors.end();
455  it++)
456  {
457  if(loc_infos[*it].merge(loc_info_dest))
458  work_queue.push(*it);
459  }
460  }
461 }
462 
464  std::ostream &out,
465  const goto_functiont &goto_function,
466  const namespacet &ns) const
467 {
468  unsigned l=0;
469 
470  for(const auto &instruction : goto_function.body.instructions)
471  {
472  out << "**** " << instruction.source_location() << "\n";
473 
474  const loc_infot &loc_info=loc_infos[l];
475 
476  for(std::size_t i=0; i<loc_info.aliases.size(); i++)
477  {
478  if(loc_info.aliases.count(i)!=1 &&
479  loc_info.aliases.find(i)==i) // root?
480  {
481  out << '{';
482  for(std::size_t j=0; j<loc_info.aliases.size(); j++)
483  if(loc_info.aliases.find(j)==i)
484  {
485  INVARIANT(j < objects.size(), "invalid object index");
486  irep_idt identifier;
487  if(objects[j].id() == ID_symbol)
488  identifier = to_symbol_expr(objects[j]).get_identifier();
489  out << ' ' << from_expr(ns, identifier, objects[j]);
490  }
491 
492  out << " }";
493  out << "\n";
494  }
495  }
496 
497  out << "\n";
498  instruction.output(out);
499  out << "\n";
500 
501  l++;
502  }
503 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
bitvector_typet c_index_type()
Definition: c_types.cpp:16
Operator to return the address of an object.
Definition: pointer_expr.h:540
exprt & object()
Definition: pointer_expr.h:549
exprt & op0()
Definition: expr.h:133
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
Base class for all expressions.
Definition: expr.h:56
bool is_zero() const
Return whether the expression is a constant representing 0.
Definition: expr.cpp:47
typet & type()
Return the type of the expression.
Definition: expr.h:84
bool is_constant() const
Return whether the expression is a constant.
Definition: expr.h:212
A goto function, consisting of function body (see body) and parameter identifiers (see parameter_iden...
Definition: goto_function.h:24
goto_programt body
Definition: goto_function.h:26
This class represents an instruction in the GOTO intermediate representation.
Definition: goto_program.h:181
const exprt & call_lhs() const
Get the lhs of a FUNCTION_CALL (may be nil)
Definition: goto_program.h:286
const exprt & assign_rhs() const
Get the rhs of the assignment for ASSIGN.
Definition: goto_program.h:214
const symbol_exprt & dead_symbol() const
Get the symbol for DEAD.
Definition: goto_program.h:244
const symbol_exprt & decl_symbol() const
Get the declared symbol for DECL.
Definition: goto_program.h:228
const exprt & assign_lhs() const
Get the lhs of the assignment for ASSIGN.
Definition: goto_program.h:200
goto_program_instruction_typet type() const
What kind of instruction?
Definition: goto_program.h:344
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:622
instructionst::const_iterator const_targett
Definition: goto_program.h:615
Array index operator.
Definition: std_expr.h:1470
exprt & array()
Definition: std_expr.h:1500
exprt & index()
Definition: std_expr.h:1510
const irep_idt & id() const
Definition: irep.h:388
bool is_nil() const
Definition: irep.h:368
goto_programt::const_targett t
Definition: local_cfg.h:28
successorst successors
Definition: local_cfg.h:29
nodest nodes
Definition: local_cfg.h:38
std::size_t node_nrt
Definition: local_cfg.h:22
loc_mapt loc_map
Definition: local_cfg.h:35
bool merge(const loc_infot &src)
void output(std::ostream &out, const goto_functiont &goto_function, const namespacet &ns) const
void assign_lhs(const exprt &lhs, const exprt &rhs, const loc_infot &loc_info_src, loc_infot &loc_info_dest)
void build(const goto_functiont &goto_function)
unsigned unknown_object
numberingt< exprt, irep_hash > objects
std::set< exprt > get(const goto_programt::const_targett t, const exprt &src) const
void get_rec(object_sett &dest, const exprt &rhs, const loc_infot &loc_info_src) const
std::stack< local_cfgt::node_nrt > work_queuet
loc_infost loc_infos
bool aliases(const goto_programt::const_targett t, const exprt &src1, const exprt &src2) const
std::set< unsigned > object_sett
bool is_local(const irep_idt &identifier) const
Definition: locals.h:37
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:94
The NIL expression.
Definition: std_expr.h:3091
number_type number(const key_type &a)
Definition: numbering.h:37
size_type size() const
Definition: numbering.h:66
An expression containing a side effect.
Definition: std_code.h:1450
const irep_idt & get_statement() const
Definition: std_code.h:1472
const irep_idt & get_identifier() const
Definition: std_expr.h:160
size_type size() const
Definition: union_find.h:97
size_type find(size_type a) const
Definition: union_find.cpp:141
size_type count(size_type a) const
Definition: union_find.h:103
void make_union(size_type a, size_type b)
Definition: union_find.cpp:13
bool same_set(size_type a, size_type b) const
Definition: union_find.h:91
void isolate(size_type a)
Definition: union_find.cpp:41
@ FUNCTION_CALL
Definition: goto_program.h:49
@ ATOMIC_END
Definition: goto_program.h:44
@ DEAD
Definition: goto_program.h:48
@ END_FUNCTION
Definition: goto_program.h:42
@ ASSIGN
Definition: goto_program.h:46
@ ASSERT
Definition: goto_program.h:36
@ SET_RETURN_VALUE
Definition: goto_program.h:45
@ ATOMIC_BEGIN
Definition: goto_program.h:43
@ CATCH
Definition: goto_program.h:51
@ END_THREAD
Definition: goto_program.h:40
@ SKIP
Definition: goto_program.h:38
@ NO_INSTRUCTION_TYPE
Definition: goto_program.h:33
@ START_THREAD
Definition: goto_program.h:39
@ THROW
Definition: goto_program.h:50
@ DECL
Definition: goto_program.h:47
@ OTHER
Definition: goto_program.h:37
@ GOTO
Definition: goto_program.h:34
@ INCOMPLETE_GOTO
Definition: goto_program.h:52
std::string from_expr(const namespacet &ns, const irep_idt &identifier, const exprt &expr)
Field-insensitive, location-sensitive may-alias analysis.
API to expression classes for Pointers.
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast an exprt to an address_of_exprt.
Definition: pointer_expr.h:577
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:534
side_effect_exprt & to_side_effect_expr(exprt &expr)
Definition: std_code.h:1506
const if_exprt & to_if_expr(const exprt &expr)
Cast an exprt to an if_exprt.
Definition: std_expr.h:2460
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:272
const typecast_exprt & to_typecast_expr(const exprt &expr)
Cast an exprt to a typecast_exprt.
Definition: std_expr.h:2107
const minus_exprt & to_minus_expr(const exprt &expr)
Cast an exprt to a minus_exprt.
Definition: std_expr.h:1086
const plus_exprt & to_plus_expr(const exprt &expr)
Cast an exprt to a plus_exprt.
Definition: std_expr.h:1041
const member_exprt & to_member_expr(const exprt &expr)
Cast an exprt to a member_exprt.
Definition: std_expr.h:2946
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
Definition: std_expr.h:1538
void merge(string_constraintst &result, string_constraintst other)
Merge two sets of constraints by appending to the first one.