CBMC
unified_diff.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Unified diff (using LCSS) of goto functions
4 
5 Author: Michael Tautschnig
6 
7 Date: April 2016
8 
9 \*******************************************************************/
10 
13 
14 #include "unified_diff.h"
15 
16 #include <algorithm>
17 
19 
21  const goto_modelt &model_old,
22  const goto_modelt &model_new)
23  : old_goto_functions(model_old.goto_functions),
24  ns_old(model_old.symbol_table),
25  new_goto_functions(model_new.goto_functions),
26  ns_new(model_new.symbol_table)
27 {
28 }
29 
31 unified_difft::get_diff(const irep_idt &function) const
32 {
33  differences_mapt::const_iterator entry = differences_map_.find(function);
34  if(entry == differences_map_.end())
35  return {};
36 
37  goto_functionst::function_mapt::const_iterator old_fit =
38  old_goto_functions.function_map.find(function);
39  goto_functionst::function_mapt::const_iterator new_fit =
40  new_goto_functions.function_map.find(function);
41 
42  goto_programt empty;
43 
44  const goto_programt &old_goto_program =
45  old_fit == old_goto_functions.function_map.end() ? empty
46  : old_fit->second.body;
47  const goto_programt &new_goto_program =
48  new_fit == new_goto_functions.function_map.end() ? empty
49  : new_fit->second.body;
50 
51  return get_diff(old_goto_program, new_goto_program, entry->second);
52 }
53 
55  const goto_programt &old_goto_program,
56  const goto_programt &new_goto_program,
57  const differencest &differences)
58 {
59  goto_programt::instructionst::const_iterator old_it =
60  old_goto_program.instructions.begin();
61  goto_programt::instructionst::const_iterator new_it =
62  new_goto_program.instructions.begin();
63 
64  goto_program_difft dest;
65 
66  for(differencest::const_reverse_iterator rit = differences.rbegin();
67  rit != differences.rend();
68  ++rit)
69  {
70  switch(*rit)
71  {
72  case differencet::SAME:
73  dest.push_back(std::make_pair(new_it, differencet::SAME));
74  INVARIANT(
75  old_it != old_goto_program.instructions.end(),
76  "old iterator reached the final goto instruction");
77  ++old_it;
78  INVARIANT(
79  new_it != new_goto_program.instructions.end(),
80  "new iterator reached the final goto instruction");
81  ++new_it;
82  break;
84  dest.push_back(std::make_pair(old_it, differencet::DELETED));
85  INVARIANT(
86  old_it != old_goto_program.instructions.end(),
87  "old iterator reached the final goto instruction");
88  ++old_it;
89  break;
90  case differencet::NEW:
91  dest.push_back(std::make_pair(new_it, differencet::NEW));
92  INVARIANT(
93  new_it != new_goto_program.instructions.end(),
94  "new iterator reached the final goto instruction");
95  ++new_it;
96  break;
97  }
98  }
99 
100  return dest;
101 }
102 
104  const irep_idt &identifier,
105  const goto_programt &old_goto_program,
106  const goto_programt &new_goto_program,
107  const differencest &differences,
108  std::ostream &os) const
109 {
110  goto_program_difft diff =
111  get_diff(old_goto_program, new_goto_program, differences);
112 
113  bool has_diff = false;
114  for(const auto &d : diff)
115  {
116  if(d.second != differencet::SAME)
117  {
118  has_diff = true;
119  break;
120  }
121  }
122  if(!has_diff)
123  return;
124 
125  os << "/** " << identifier << " **/\n";
126 
127  for(const auto &d : diff)
128  {
129  switch(d.second)
130  {
131  case differencet::SAME:
132  os << ' ';
133  d.first->output(os);
134  break;
136  os << '-';
137  d.first->output(os);
138  break;
139  case differencet::NEW:
140  os << '+';
141  d.first->output(os);
142  break;
143  }
144  }
145 }
146 
148  const goto_programt &old_goto_program,
149  const goto_programt &new_goto_program)
150 {
151  std::size_t old_count = old_goto_program.instructions.size();
152  std::size_t new_count = new_goto_program.instructions.size();
153 
154  differencest differences;
155  differences.reserve(old_count + new_count);
156 
157  // skip common prefix
158  goto_programt::instructionst::const_iterator old_it =
159  old_goto_program.instructions.begin();
160  goto_programt::instructionst::const_iterator new_it =
161  new_goto_program.instructions.begin();
162 
163  for(; old_it != old_goto_program.instructions.end() &&
164  new_it != new_goto_program.instructions.end();
165  ++old_it, ++new_it)
166  {
167  if(!instructions_equal(*old_it, *new_it))
168  break;
169 
170  --old_count;
171  --new_count;
172  }
173  // old_it, new_it are now iterators to the first instructions that
174  // differ
175 
176  // skip common suffix
177  goto_programt::instructionst::const_iterator old_rit =
178  old_goto_program.instructions.end();
179  goto_programt::instructionst::const_iterator new_rit =
180  new_goto_program.instructions.end();
181 
182  while(old_rit != old_it && new_rit != new_it)
183  {
184  --old_rit;
185  --new_rit;
186 
187  if(!instructions_equal(*old_rit, *new_rit))
188  {
189  ++old_rit;
190  ++new_rit;
191  break;
192  }
193 
194  --old_count;
195  --new_count;
196  differences.push_back(differencet::SAME);
197  }
198  // old_rit, new_rit are now iterators to the first instructions of
199  // the common tail
200 
201  if(old_count == 0 && new_count == 0)
202  return differences;
203 
204  // apply longest common subsequence (LCSS)
205  typedef std::vector<std::vector<std::size_t>> lcss_matrixt;
206  lcss_matrixt lcss_matrix(
207  old_count + 1, std::vector<size_t>(new_count + 1, 0));
208 
209  // fill the matrix
210  std::size_t i = 1, j = 1;
211  for(goto_programt::instructionst::const_iterator old_it2 = old_it;
212  old_it2 != old_rit;
213  ++old_it2)
214  {
215  j = 1;
216  for(goto_programt::instructionst::const_iterator new_it2 = new_it;
217  new_it2 != new_rit;
218  ++new_it2)
219  {
220  if(instructions_equal(*old_it2, *new_it2))
221  lcss_matrix[i][j] += lcss_matrix[i - 1][j - 1] + 1;
222  else
223  lcss_matrix[i][j] =
224  std::max(lcss_matrix[i][j - 1], lcss_matrix[i - 1][j]);
225 
226  ++j;
227  }
228  ++i;
229  }
230 
231 #if 0
232  std::cerr << "old_count=" << old_count << '\n';
233  std::cerr << "new_count=" << new_count << '\n';
234  for(i=0; i<=old_count; ++i)
235  {
236  for(j=0; j<=new_count; ++j)
237  {
238  std::cerr << " ";
239  if(lcss_matrix[i][j]<10)
240  std::cerr << " ";
241  std::cerr << lcss_matrix[i][j];
242  }
243  std::cerr << '\n';
244  }
245 #endif
246 
247  // backtracking
248  // note that the order in case of multiple possible matches of
249  // the if-clauses is important to provide a convenient ordering
250  // of - and + lines (- goes before +)
251  i = old_count;
252  j = new_count;
253  --old_rit;
254  --new_rit;
255 
256  while(i > 0 || j > 0)
257  {
258  if(i == 0)
259  {
260  differences.push_back(differencet::NEW);
261  --j;
262  if(new_goto_program.instructions.begin()!=new_rit)
263  --new_rit;
264  }
265  else if(j == 0)
266  {
267  differences.push_back(differencet::DELETED);
268  --i;
269  if(old_goto_program.instructions.begin()!=old_rit)
270  --old_rit;
271  }
272  else if(instructions_equal(*old_rit, *new_rit))
273  {
274  differences.push_back(differencet::SAME);
275  --i;
276  if(old_goto_program.instructions.begin()!=old_rit)
277  --old_rit;
278  --j;
279  if(new_goto_program.instructions.begin()!=new_rit)
280  --new_rit;
281  }
282  else if(lcss_matrix[i][j - 1] < lcss_matrix[i][j])
283  {
284  differences.push_back(differencet::DELETED);
285  --i;
286  if(old_goto_program.instructions.begin()!=old_rit)
287  --old_rit;
288  }
289  else
290  {
291  differences.push_back(differencet::NEW);
292  --j;
293  if(new_goto_program.instructions.begin()!=new_rit)
294  --new_rit;
295  }
296  }
297 
298  // add common prefix (if any)
299  for(; old_it != old_goto_program.instructions.begin(); --old_it)
300  differences.push_back(differencet::SAME);
301 
302  return differences;
303 }
304 
306  const irep_idt &identifier,
307  const goto_programt &old_goto_program,
308  const goto_programt &new_goto_program)
309 {
310  differencest &differences = differences_map_[identifier];
311  differences.clear();
312 
313  if(
314  old_goto_program.instructions.empty() ||
315  new_goto_program.instructions.empty())
316  {
317  if(new_goto_program.instructions.empty())
318  differences.resize(
319  old_goto_program.instructions.size(), differencet::DELETED);
320  else
321  differences.resize(
322  new_goto_program.instructions.size(), differencet::NEW);
323  }
324  else
325  differences=lcss(old_goto_program, new_goto_program);
326 }
327 
329 {
330  typedef std::map<irep_idt, goto_functionst::function_mapt::const_iterator>
331  function_mapt;
332 
333  function_mapt old_funcs, new_funcs;
334 
335  for(auto it = old_goto_functions.function_map.begin();
336  it != old_goto_functions.function_map.end();
337  ++it)
338  {
339  old_funcs.insert(std::make_pair(it->first, it));
340  }
341  for(auto it = new_goto_functions.function_map.begin();
342  it != new_goto_functions.function_map.end();
343  ++it)
344  {
345  new_funcs.insert(std::make_pair(it->first, it));
346  }
347 
348  goto_programt empty;
349 
350  function_mapt::const_iterator ito = old_funcs.begin();
351  for(function_mapt::const_iterator itn = new_funcs.begin();
352  itn != new_funcs.end();
353  ++itn)
354  {
355  for(; ito != old_funcs.end() && ito->first < itn->first; ++ito)
356  unified_diff(ito->first, ito->second->second.body, empty);
357 
358  if(ito == old_funcs.end() || itn->first < ito->first)
359  unified_diff(itn->first, empty, itn->second->second.body);
360  else
361  {
362  INVARIANT(
363  ito->first == itn->first, "old and new function names do not match");
364  unified_diff(
365  itn->first, ito->second->second.body, itn->second->second.body);
366  ++ito;
367  }
368  }
369  for(; ito != old_funcs.end(); ++ito)
370  unified_diff(ito->first, ito->second->second.body, empty);
371 
372  return !differences_map_.empty();
373 }
374 
375 void unified_difft::output(std::ostream &os) const
376 {
377  goto_programt empty;
378 
379  for(auto const &p : differences_map_)
380  {
381  const irep_idt &function = p.first;
382 
383  goto_functionst::function_mapt::const_iterator old_fit =
384  old_goto_functions.function_map.find(function);
385  goto_functionst::function_mapt::const_iterator new_fit =
386  new_goto_functions.function_map.find(function);
387 
388  const goto_programt &old_goto_program =
389  old_fit == old_goto_functions.function_map.end() ? empty
390  : old_fit->second.body;
391  const goto_programt &new_goto_program =
392  new_fit == new_goto_functions.function_map.end() ? empty
393  : new_fit->second.body;
394 
395  output_diff(function, old_goto_program, new_goto_program, p.second, os);
396  }
397 }
398 
400  const goto_programt::instructiont &ins1,
401  const goto_programt::instructiont &ins2)
402 {
403  return ins1.equals(ins2) &&
404  (ins1.targets.empty() ||
405  instructions_equal(*ins1.get_target(), *ins2.get_target()));
406 }
407 
409 {
410  return differences_map_;
411 }
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
function_mapt function_map
This class represents an instruction in the GOTO intermediate representation.
Definition: goto_program.h:181
targetst targets
The list of successor instructions.
Definition: goto_program.h:414
bool equals(const instructiont &other) const
Syntactic equality: two instructiont are equal if they have the same type, code, guard,...
const_targett get_target() const
Returns the first (and only) successor for the usual case of a single target.
Definition: goto_program.h:418
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:73
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:622
static bool instructions_equal(const goto_programt::instructiont &ins1, const goto_programt::instructiont &ins2)
std::map< irep_idt, differencest > differences_mapt
Definition: unified_diff.h:57
std::vector< differencet > differencest
Definition: unified_diff.h:56
const goto_functionst & old_goto_functions
Definition: unified_diff.h:51
std::list< std::pair< goto_programt::const_targett, differencet > > goto_program_difft
Definition: unified_diff.h:46
const goto_functionst & new_goto_functions
Definition: unified_diff.h:53
goto_program_difft get_diff(const irep_idt &function) const
void unified_diff(const irep_idt &identifier, const goto_programt &old_goto_program, const goto_programt &new_goto_program)
unified_difft(const goto_modelt &model_old, const goto_modelt &model_new)
const differences_mapt & differences_map() const
static differencest lcss(const goto_programt &old_goto_program, const goto_programt &new_goto_program)
void output_diff(const irep_idt &identifier, const goto_programt &old_goto_program, const goto_programt &new_goto_program, const differencest &differences, std::ostream &os) const
differences_mapt differences_map_
Definition: unified_diff.h:86
void output(std::ostream &os) const
Symbol Table + CFG.
Unified diff (using LCSS) of goto functions.