CBMC
struct_encoding.cpp
Go to the documentation of this file.
1 // Author: Diffblue Ltd.
2 
3 #include "struct_encoding.h"
4 
5 #include <util/arith_tools.h>
6 #include <util/bitvector_expr.h>
7 #include <util/bitvector_types.h>
8 #include <util/c_types.h>
9 #include <util/namespace.h>
10 #include <util/range.h>
11 #include <util/simplify_expr.h>
12 
15 
16 #include <algorithm>
17 #include <numeric>
18 #include <optional>
19 #include <queue>
20 
22  : boolbv_width{std::make_unique<boolbv_widtht>(ns)}, ns{ns}
23 {
24 }
25 
27  : boolbv_width{std::make_unique<boolbv_widtht>(*other.boolbv_width)},
28  ns{other.ns}
29 {
30 }
31 
33 
37 static std::optional<std::size_t>
38 needs_width(const typet &type, const boolbv_widtht &boolbv_width)
39 {
40  if(const auto struct_tag = type_try_dynamic_cast<struct_tag_typet>(type))
41  return boolbv_width(*struct_tag);
42  if(const auto union_tag = type_try_dynamic_cast<union_tag_typet>(type))
43  return boolbv_width(*union_tag);
44  return {};
45 }
46 
48 {
49  std::queue<typet *> work_queue;
50  work_queue.push(&type);
51  while(!work_queue.empty())
52  {
53  typet &current = *work_queue.front();
54  work_queue.pop();
55  if(auto assigned_bit_width = needs_width(current, *boolbv_width))
56  {
57  // The bit vector theory of SMT disallows zero bit length length bit
58  // vectors. C++ gives a minimum size for a struct (even an empty struct)
59  // as being one byte; in order to ensure that structs have unique memory
60  // locations. Therefore encoding empty structs as having 8 bits / 1 byte
61  // is a reasonable solution in this case.
62  if(*assigned_bit_width == 0)
63  assigned_bit_width = 8;
64  current = bv_typet{*assigned_bit_width};
65  }
66  if(const auto array = type_try_dynamic_cast<array_typet>(current))
67  {
68  work_queue.push(&array->element_type());
69  }
70  }
71  return type;
72 }
73 
81 static std::unordered_map<irep_idt, exprt>
82 extricate_updates(const with_exprt &struct_expr)
83 {
84  std::unordered_map<irep_idt, exprt> pairs;
85  auto current_operand = struct_expr.operands().begin();
86  // Skip the struct being updated in order to begin with the pairs.
87  current_operand++;
88  while(current_operand != struct_expr.operands().end())
89  {
90  INVARIANT(
91  current_operand->id() == ID_member_name,
92  "operand is expected to be the name of a member");
93  auto name = current_operand->find(ID_component_name).id();
94  ++current_operand;
95  INVARIANT(
96  current_operand != struct_expr.operands().end(),
97  "every name is expected to be followed by a paired value");
98  pairs[name] = *current_operand;
99  ++current_operand;
100  }
101  POSTCONDITION(!pairs.empty());
102  return pairs;
103 }
104 
105 static exprt encode(const with_exprt &with, const namespacet &ns)
106 {
107  const auto tag_type = type_checked_cast<struct_tag_typet>(with.type());
108  const auto struct_type =
109  type_checked_cast<struct_typet>(ns.follow(with.type()));
110  const auto updates = extricate_updates(with);
111  const auto components =
112  make_range(struct_type.components())
113  .map([&](const struct_union_typet::componentt &component) -> exprt {
114  const auto &update = updates.find(component.get_name());
115  if(update != updates.end())
116  return update->second;
117  else
118  return member_exprt{
119  with.old(), component.get_name(), component.type()};
120  })
121  .collect<exprt::operandst>();
122  return struct_exprt{components, tag_type};
123 }
124 
131 {
132  static auto empty_byte = from_integer(0, bv_typet{8});
133  return empty_byte;
134 }
135 
138 static exprt encode(const struct_exprt &struct_expr)
139 {
140  if(struct_expr.operands().empty())
141  return empty_encoding();
142  if(struct_expr.operands().size() == 1)
143  return struct_expr.operands().front();
144  return concatenation_exprt{struct_expr.operands(), struct_expr.type()};
145 }
146 
147 static exprt
148 encode(const union_exprt &union_expr, const boolbv_widtht &bit_width)
149 {
150  const std::size_t union_width = bit_width(union_expr.type());
151  const exprt &component = union_expr.op();
152  const std::size_t component_width = bit_width(union_expr.op().type());
153  if(union_width == component_width)
154  return typecast_exprt(component, bv_typet{union_width});
155  INVARIANT(
156  union_width >= component_width,
157  "Union must be at least as wide as its component.");
158  return concatenation_exprt{
159  {nondet_padding_exprt{bv_typet{union_width - component_width}}, component},
160  bv_typet{union_width}};
161 }
162 
163 static std::size_t count_trailing_bit_width(
164  const struct_typet &type,
165  const irep_idt name,
166  const boolbv_widtht &boolbv_width)
167 {
168  auto member_component_rit = std::find_if(
169  type.components().rbegin(),
170  type.components().rend(),
172  return component.get_name() == name;
173  });
174  INVARIANT(
175  member_component_rit != type.components().rend(),
176  "Definition of struct type should include named component.");
177  const auto trailing_widths =
178  make_range(type.components().rbegin(), member_component_rit)
179  .map([&](const struct_union_typet::componentt &component) -> std::size_t {
180  return boolbv_width(component.type());
181  });
182  return std::accumulate(
183  trailing_widths.begin(), trailing_widths.end(), std::size_t{0});
184 }
185 
196 {
197  const auto &type = ns.get().follow(member_expr.compound().type());
198  const auto member_bits_width = (*boolbv_width)(member_expr.type());
199  const auto offset_bits = [&]() -> std::size_t {
201  return 0;
202  const auto &struct_type = type_checked_cast<struct_typet>(type);
204  struct_type, member_expr.get_component_name(), *boolbv_width);
205  }();
206  return extractbits_exprt{
207  member_expr.compound(),
208  offset_bits + member_bits_width - 1,
209  offset_bits,
210  member_expr.type()};
211 }
212 
214 {
215  std::queue<exprt *> work_queue;
216  work_queue.push(&expr);
217  while(!work_queue.empty())
218  {
219  exprt &current = *work_queue.front();
220  // Note that "with" expressions are handled before type encoding in order to
221  // facilitate checking that they are applied to structs rather than arrays.
222  if(const auto with_expr = expr_try_dynamic_cast<with_exprt>(current))
224  current = ::encode(*with_expr, ns);
225  current.type() = encode(current.type());
226  std::optional<exprt> update;
227  if(const auto struct_expr = expr_try_dynamic_cast<struct_exprt>(current))
228  update = ::encode(*struct_expr);
229  if(const auto union_expr = expr_try_dynamic_cast<union_exprt>(current))
230  update = ::encode(*union_expr, *boolbv_width);
232  update = ::empty_encoding();
233  if(const auto member_expr = expr_try_dynamic_cast<member_exprt>(current))
234  update = encode_member(*member_expr);
235  if(update)
236  {
237  INVARIANT(
238  current != *update,
239  "Updates in struct encoding are expected to be a change of state.");
240  current = std::move(*update);
241  // Repeat on the updated node until we reach a fixed point. This is needed
242  // because the encoding of an outer struct/union may be initially
243  // expressed in terms of an inner struct/union which it contains.
244  continue;
245  }
246  work_queue.pop();
247  for(auto &operand : current.operands())
248  work_queue.push(&operand);
249  }
250  return expr;
251 }
252 
254  const exprt &encoded,
255  const struct_tag_typet &original_type) const
256 {
257  // The algorithm below works by extracting each of the separate fields from
258  // the combined encoded value using a `member_exprt` which is itself encoded
259  // into a `bit_extract_exprt`. These separated fields can then be combined
260  // using a `struct_exprt`. This is expected to duplicate the input encoded
261  // expression for each of the fields. However for the case where the input
262  // encoded expression is a `constant_exprt`, expression simplification will be
263  // able simplify away the duplicated portions of the constant and the bit
264  // extraction expressions. This yields a clean struct expression where each
265  // field is a separate constant containing the data solely relevant to that
266  // field.
267  INVARIANT(
268  can_cast_type<bv_typet>(encoded.type()),
269  "Structs are expected to be encoded into bit vectors.");
270  const struct_typet definition = ns.get().follow_tag(original_type);
271  exprt::operandst encoded_fields;
272  for(const auto &component : definition.components())
273  {
274  encoded_fields.push_back(typecast_exprt::conditional_cast(
275  encode(member_exprt{typecast_exprt{encoded, original_type}, component}),
276  component.type()));
277  }
278  return simplify_expr(struct_exprt{encoded_fields, original_type}, ns);
279 }
280 
282  const exprt &encoded,
283  const union_tag_typet &original_type) const
284 {
285  INVARIANT(
286  can_cast_type<bv_typet>(encoded.type()),
287  "Unions are expected to be encoded into bit vectors.");
288  const union_typet definition = ns.get().follow_tag(original_type);
289  const auto &components = definition.components();
290  if(components.empty())
291  return empty_union_exprt{original_type};
292  // A union expression is built here using the first component in the
293  // declaration of the union. There may be better alternatives but this matches
294  // the SAT backend. See the case for `type.id() == ID_union` in
295  // `boolbvt::bv_get_rec`.
296  const auto &component_for_definition = components[0];
297  return simplify_expr(
298  union_exprt{
299  component_for_definition.get_name(),
302  typecast_exprt{encoded, original_type}, component_for_definition}),
303  component_for_definition.type()},
304  original_type},
305  ns);
306 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
API to expression classes for bitvectors.
Pre-defined bitvector types.
bool can_cast_type< bv_typet >(const typet &type)
Check whether a reference to a typet is a bv_typet.
bool can_cast_type< union_typet >(const typet &type)
Check whether a reference to a typet is a union_typet.
Definition: c_types.h:171
Fixed-width bit-vector without numerical interpretation.
Concatenation of bit-vector operands.
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
Union constructor to support unions without any member (a GCC/Clang feature).
Definition: std_expr.h:1829
Base class for all expressions.
Definition: expr.h:56
std::vector< exprt > operandst
Definition: expr.h:58
typet & type()
Return the type of the expression.
Definition: expr.h:84
operandst & operands()
Definition: expr.h:94
Extracts a sub-range of a bit-vector operand.
const irept & find(const irep_idt &name) const
Definition: irep.cpp:93
Extract member of struct or union.
Definition: std_expr.h:2841
const exprt & compound() const
Definition: std_expr.h:2890
irep_idt get_component_name() const
Definition: std_expr.h:2863
const typet & follow(const typet &) const
Resolve type symbol to the type it points to.
Definition: namespace.cpp:49
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:94
This expression serves as a placeholder for the bits which have non deterministic value in a larger b...
Encodes struct types/values into non-struct expressions/types.
std::unique_ptr< boolbv_widtht > boolbv_width
std::reference_wrapper< const namespacet > ns
typet encode(typet type) const
exprt decode(const exprt &encoded, const struct_tag_typet &original_type) const
Reconstructs a struct expression of the original_type using the data from the bit vector encoded expr...
exprt encode_member(const member_exprt &member_expr) const
The member expression selects the value of a field from a struct or union.
struct_encodingt(const namespacet &ns)
Struct constructor from list of elements.
Definition: std_expr.h:1872
A struct tag type, i.e., struct_typet with an identifier.
Definition: std_types.h:493
Structure type, corresponds to C style structs.
Definition: std_types.h:231
const componentst & components() const
Definition: std_types.h:147
Semantic type conversion.
Definition: std_expr.h:2068
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition: std_expr.h:2076
The type of an expression, extends irept.
Definition: type.h:29
const exprt & op() const
Definition: std_expr.h:391
Union constructor from single element.
Definition: std_expr.h:1765
A union tag type, i.e., union_typet with an identifier.
Definition: c_types.h:199
The union type.
Definition: c_types.h:147
Operator to update elements in structs and arrays.
Definition: std_expr.h:2471
Expressions for use in incremental SMT2 decision procedure.
Ranges: pair of begin and end iterators, which can be initialized from containers,...
ranget< iteratort > make_range(iteratort begin, iteratort end)
Definition: range.h:522
exprt simplify_expr(exprt src, const namespacet &ns)
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition: invariant.h:423
#define POSTCONDITION(CONDITION)
Definition: invariant.h:479
auto component(T &struct_expr, const irep_idt &name, const namespacet &ns) -> decltype(struct_expr.op0())
Definition: std_expr.cpp:80
bool can_cast_expr< empty_union_exprt >(const exprt &base)
Definition: std_expr.h:1838
bool can_cast_type< struct_tag_typet >(const typet &type)
Check whether a reference to a typet is a struct_tag_typet.
Definition: std_types.h:505
static exprt empty_encoding()
Empty structs and unions are encoded as a zero byte.
static exprt encode(const with_exprt &with, const namespacet &ns)
static std::size_t count_trailing_bit_width(const struct_typet &type, const irep_idt name, const boolbv_widtht &boolbv_width)
static std::optional< std::size_t > needs_width(const typet &type, const boolbv_widtht &boolbv_width)
If the given type needs re-encoding as a bit-vector then this function.
static std::unordered_map< irep_idt, exprt > extricate_updates(const with_exprt &struct_expr)
Extracts the component/field names and new values from a with_exprt which expresses a new struct valu...