CBMC
struct_encoding.cpp
Go to the documentation of this file.
1 // Author: Diffblue Ltd.
2 
3 #include "struct_encoding.h"
4 
5 #include <util/arith_tools.h>
6 #include <util/bitvector_expr.h>
7 #include <util/bitvector_types.h>
8 #include <util/c_types.h>
9 #include <util/namespace.h>
10 #include <util/range.h>
11 #include <util/simplify_expr.h>
12 
15 
16 #include <algorithm>
17 #include <numeric>
18 #include <optional>
19 #include <queue>
20 
22  : boolbv_width{std::make_unique<boolbv_widtht>(ns)}, ns{ns}
23 {
24 }
25 
27  : boolbv_width{std::make_unique<boolbv_widtht>(*other.boolbv_width)},
28  ns{other.ns}
29 {
30 }
31 
33 
37 static std::optional<std::size_t>
38 needs_width(const typet &type, const boolbv_widtht &boolbv_width)
39 {
40  if(const auto struct_tag = type_try_dynamic_cast<struct_tag_typet>(type))
41  return boolbv_width(*struct_tag);
42  if(const auto union_tag = type_try_dynamic_cast<union_tag_typet>(type))
43  return boolbv_width(*union_tag);
44  return {};
45 }
46 
48 {
49  std::queue<typet *> work_queue;
50  work_queue.push(&type);
51  while(!work_queue.empty())
52  {
53  typet &current = *work_queue.front();
54  work_queue.pop();
55  if(auto assigned_bit_width = needs_width(current, *boolbv_width))
56  {
57  // The bit vector theory of SMT disallows zero bit length length bit
58  // vectors. C++ gives a minimum size for a struct (even an empty struct)
59  // as being one byte; in order to ensure that structs have unique memory
60  // locations. Therefore encoding empty structs as having 8 bits / 1 byte
61  // is a reasonable solution in this case.
62  if(*assigned_bit_width == 0)
63  assigned_bit_width = 8;
64  current = bv_typet{*assigned_bit_width};
65  }
66  if(const auto array = type_try_dynamic_cast<array_typet>(current))
67  {
68  work_queue.push(&array->element_type());
69  }
70  }
71  return type;
72 }
73 
81 static std::unordered_map<irep_idt, exprt>
82 extricate_updates(const with_exprt &struct_expr)
83 {
84  std::unordered_map<irep_idt, exprt> pairs;
85  auto current_operand = struct_expr.operands().begin();
86  // Skip the struct being updated in order to begin with the pairs.
87  current_operand++;
88  while(current_operand != struct_expr.operands().end())
89  {
90  INVARIANT(
91  current_operand->id() == ID_member_name,
92  "operand is expected to be the name of a member");
93  auto name = current_operand->find(ID_component_name).id();
94  ++current_operand;
95  INVARIANT(
96  current_operand != struct_expr.operands().end(),
97  "every name is expected to be followed by a paired value");
98  pairs[name] = *current_operand;
99  ++current_operand;
100  }
101  POSTCONDITION(!pairs.empty());
102  return pairs;
103 }
104 
105 static exprt encode(const with_exprt &with, const namespacet &ns)
106 {
107  const auto tag_type = type_checked_cast<struct_tag_typet>(with.type());
108  const auto struct_type = ns.follow_tag(tag_type);
109  const auto updates = extricate_updates(with);
110  const auto components =
111  make_range(struct_type.components())
112  .map([&](const struct_union_typet::componentt &component) -> exprt {
113  const auto &update = updates.find(component.get_name());
114  if(update != updates.end())
115  return update->second;
116  else
117  return member_exprt{
118  with.old(), component.get_name(), component.type()};
119  })
120  .collect<exprt::operandst>();
121  return struct_exprt{components, tag_type};
122 }
123 
130 {
131  static auto empty_byte = from_integer(0, bv_typet{8});
132  return empty_byte;
133 }
134 
137 static exprt encode(const struct_exprt &struct_expr)
138 {
139  if(struct_expr.operands().empty())
140  return empty_encoding();
141  if(struct_expr.operands().size() == 1)
142  return struct_expr.operands().front();
143  return concatenation_exprt{struct_expr.operands(), struct_expr.type()};
144 }
145 
146 static exprt
147 encode(const union_exprt &union_expr, const boolbv_widtht &bit_width)
148 {
149  const std::size_t union_width = bit_width(union_expr.type());
150  const exprt &component = union_expr.op();
151  const std::size_t component_width = bit_width(union_expr.op().type());
152  if(union_width == component_width)
153  return typecast_exprt(component, bv_typet{union_width});
154  INVARIANT(
155  union_width >= component_width,
156  "Union must be at least as wide as its component.");
157  return concatenation_exprt{
158  {nondet_padding_exprt{bv_typet{union_width - component_width}}, component},
159  bv_typet{union_width}};
160 }
161 
162 static std::size_t count_trailing_bit_width(
163  const struct_typet &type,
164  const irep_idt name,
165  const boolbv_widtht &boolbv_width)
166 {
167  auto member_component_rit = std::find_if(
168  type.components().rbegin(),
169  type.components().rend(),
171  return component.get_name() == name;
172  });
173  INVARIANT(
174  member_component_rit != type.components().rend(),
175  "Definition of struct type should include named component.");
176  const auto trailing_widths =
177  make_range(type.components().rbegin(), member_component_rit)
178  .map([&](const struct_union_typet::componentt &component) -> std::size_t {
179  return boolbv_width(component.type());
180  });
181  return std::accumulate(
182  trailing_widths.begin(), trailing_widths.end(), std::size_t{0});
183 }
184 
195 {
196  const auto &compound_type = member_expr.compound().type();
197  const auto offset_bits = [&]() -> std::size_t {
198  if(
199  can_cast_type<union_typet>(compound_type) ||
200  can_cast_type<union_tag_typet>(compound_type))
201  {
202  return 0;
203  }
204  const auto &struct_type =
205  compound_type.id() == ID_struct_tag
206  ? ns.get().follow_tag(
207  type_checked_cast<struct_tag_typet>(compound_type))
208  : type_checked_cast<struct_typet>(compound_type);
210  struct_type, member_expr.get_component_name(), *boolbv_width);
211  }();
212  return extractbits_exprt{
213  member_expr.compound(), offset_bits, member_expr.type()};
214 }
215 
217 {
218  std::queue<exprt *> work_queue;
219  work_queue.push(&expr);
220  while(!work_queue.empty())
221  {
222  exprt &current = *work_queue.front();
223  // Note that "with" expressions are handled before type encoding in order to
224  // facilitate checking that they are applied to structs rather than arrays.
225  if(const auto with_expr = expr_try_dynamic_cast<with_exprt>(current))
227  current = ::encode(*with_expr, ns);
228  current.type() = encode(current.type());
229  std::optional<exprt> update;
230  if(const auto struct_expr = expr_try_dynamic_cast<struct_exprt>(current))
231  update = ::encode(*struct_expr);
232  if(const auto union_expr = expr_try_dynamic_cast<union_exprt>(current))
233  update = ::encode(*union_expr, *boolbv_width);
235  update = ::empty_encoding();
236  if(const auto member_expr = expr_try_dynamic_cast<member_exprt>(current))
237  update = encode_member(*member_expr);
238  if(update)
239  {
240  INVARIANT(
241  current != *update,
242  "Updates in struct encoding are expected to be a change of state.");
243  current = std::move(*update);
244  // Repeat on the updated node until we reach a fixed point. This is needed
245  // because the encoding of an outer struct/union may be initially
246  // expressed in terms of an inner struct/union which it contains.
247  continue;
248  }
249  work_queue.pop();
250  for(auto &operand : current.operands())
251  work_queue.push(&operand);
252  }
253  return expr;
254 }
255 
257  const exprt &encoded,
258  const struct_tag_typet &original_type) const
259 {
260  // The algorithm below works by extracting each of the separate fields from
261  // the combined encoded value using a `member_exprt` which is itself encoded
262  // into a `bit_extract_exprt`. These separated fields can then be combined
263  // using a `struct_exprt`. This is expected to duplicate the input encoded
264  // expression for each of the fields. However for the case where the input
265  // encoded expression is a `constant_exprt`, expression simplification will be
266  // able simplify away the duplicated portions of the constant and the bit
267  // extraction expressions. This yields a clean struct expression where each
268  // field is a separate constant containing the data solely relevant to that
269  // field.
270  INVARIANT(
271  can_cast_type<bv_typet>(encoded.type()),
272  "Structs are expected to be encoded into bit vectors.");
273  const struct_typet definition = ns.get().follow_tag(original_type);
274  exprt::operandst encoded_fields;
275  for(const auto &component : definition.components())
276  {
277  encoded_fields.push_back(typecast_exprt::conditional_cast(
278  encode(member_exprt{typecast_exprt{encoded, original_type}, component}),
279  component.type()));
280  }
281  return simplify_expr(struct_exprt{encoded_fields, original_type}, ns);
282 }
283 
285  const exprt &encoded,
286  const union_tag_typet &original_type) const
287 {
288  INVARIANT(
289  can_cast_type<bv_typet>(encoded.type()),
290  "Unions are expected to be encoded into bit vectors.");
291  const union_typet definition = ns.get().follow_tag(original_type);
292  const auto &components = definition.components();
293  if(components.empty())
294  return empty_union_exprt{original_type};
295  // A union expression is built here using the first component in the
296  // declaration of the union. There may be better alternatives but this matches
297  // the SAT backend. See the case for `type.id() == ID_union` in
298  // `boolbvt::bv_get_rec`.
299  const auto &component_for_definition = components[0];
300  return simplify_expr(
301  union_exprt{
302  component_for_definition.get_name(),
305  typecast_exprt{encoded, original_type}, component_for_definition}),
306  component_for_definition.type()},
307  original_type},
308  ns);
309 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
API to expression classes for bitvectors.
Pre-defined bitvector types.
bool can_cast_type< bv_typet >(const typet &type)
Check whether a reference to a typet is a bv_typet.
bool can_cast_type< union_typet >(const typet &type)
Check whether a reference to a typet is a union_typet.
Definition: c_types.h:171
bool can_cast_type< union_tag_typet >(const typet &type)
Check whether a reference to a typet is a union_tag_typet.
Definition: c_types.h:211
Fixed-width bit-vector without numerical interpretation.
Concatenation of bit-vector operands.
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
Union constructor to support unions without any member (a GCC/Clang feature).
Definition: std_expr.h:1834
Base class for all expressions.
Definition: expr.h:56
std::vector< exprt > operandst
Definition: expr.h:58
typet & type()
Return the type of the expression.
Definition: expr.h:84
operandst & operands()
Definition: expr.h:94
Extracts a sub-range of a bit-vector operand.
const irept & find(const irep_idt &name) const
Definition: irep.cpp:93
const irep_idt & id() const
Definition: irep.h:388
Extract member of struct or union.
Definition: std_expr.h:2854
const exprt & compound() const
Definition: std_expr.h:2903
irep_idt get_component_name() const
Definition: std_expr.h:2876
const union_typet & follow_tag(const union_tag_typet &) const
Follow type tag of union type.
Definition: namespace.cpp:63
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:94
This expression serves as a placeholder for the bits which have non deterministic value in a larger b...
Encodes struct types/values into non-struct expressions/types.
std::unique_ptr< boolbv_widtht > boolbv_width
std::reference_wrapper< const namespacet > ns
typet encode(typet type) const
exprt decode(const exprt &encoded, const struct_tag_typet &original_type) const
Reconstructs a struct expression of the original_type using the data from the bit vector encoded expr...
exprt encode_member(const member_exprt &member_expr) const
The member expression selects the value of a field from a struct or union.
struct_encodingt(const namespacet &ns)
Struct constructor from list of elements.
Definition: std_expr.h:1877
A struct tag type, i.e., struct_typet with an identifier.
Definition: std_types.h:493
Structure type, corresponds to C style structs.
Definition: std_types.h:231
const componentst & components() const
Definition: std_types.h:147
Semantic type conversion.
Definition: std_expr.h:2073
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition: std_expr.h:2081
The type of an expression, extends irept.
Definition: type.h:29
const exprt & op() const
Definition: std_expr.h:391
Union constructor from single element.
Definition: std_expr.h:1770
A union tag type, i.e., union_typet with an identifier.
Definition: c_types.h:199
The union type.
Definition: c_types.h:147
Operator to update elements in structs and arrays.
Definition: std_expr.h:2481
Expressions for use in incremental SMT2 decision procedure.
Ranges: pair of begin and end iterators, which can be initialized from containers,...
ranget< iteratort > make_range(iteratort begin, iteratort end)
Definition: range.h:522
exprt simplify_expr(exprt src, const namespacet &ns)
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition: invariant.h:423
#define POSTCONDITION(CONDITION)
Definition: invariant.h:479
auto component(T &struct_expr, const irep_idt &name, const namespacet &ns) -> decltype(struct_expr.op0())
Definition: std_expr.cpp:97
bool can_cast_expr< empty_union_exprt >(const exprt &base)
Definition: std_expr.h:1843
bool can_cast_type< struct_tag_typet >(const typet &type)
Check whether a reference to a typet is a struct_tag_typet.
Definition: std_types.h:505
static exprt empty_encoding()
Empty structs and unions are encoded as a zero byte.
static exprt encode(const with_exprt &with, const namespacet &ns)
static std::size_t count_trailing_bit_width(const struct_typet &type, const irep_idt name, const boolbv_widtht &boolbv_width)
static std::optional< std::size_t > needs_width(const typet &type, const boolbv_widtht &boolbv_width)
If the given type needs re-encoding as a bit-vector then this function.
static std::unordered_map< irep_idt, exprt > extricate_updates(const with_exprt &struct_expr)
Extracts the component/field names and new values from a with_exprt which expresses a new struct valu...