CBMC
Loading...
Searching...
No Matches
float_utils.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module:
4
5Author: Daniel Kroening, kroening@kroening.com
6
7\*******************************************************************/
8
9#include "float_utils.h"
10
11#include <algorithm>
12
13#include <util/arith_tools.h>
14
16{
17 bvt round_to_even=
19 bvt round_to_plus_inf=
21 bvt round_to_minus_inf=
23 bvt round_to_zero=
25 bvt round_to_away =
27
29 rounding_mode_bits.round_to_plus_inf=bv_utils.equal(src, round_to_plus_inf);
30 rounding_mode_bits.round_to_minus_inf=bv_utils.equal(src, round_to_minus_inf);
32 rounding_mode_bits.round_to_away = bv_utils.equal(src, round_to_away);
33}
34
36{
37 unbiased_floatt result;
38
39 // we need to convert negative integers
40 result.sign=sign_bit(src);
41
43
44 // build an exponent (unbiased) -- this is signed!
45 result.exponent=
47 src.size()-1,
48 address_bits(src.size() - 1) + 1);
49
50 return round_and_pack(result);
51}
52
54{
55 unbiased_floatt result;
56
57 result.fraction=src;
58
59 // build an exponent (unbiased) -- this is signed!
60 result.exponent=
62 src.size()-1,
63 address_bits(src.size() - 1) + 1);
64
65 result.sign=const_literal(false);
66
67 return round_and_pack(result);
68}
69
71 const bvt &src,
72 std::size_t dest_width)
73{
74 return to_integer(src, dest_width, true);
75}
76
78 const bvt &src,
79 std::size_t dest_width)
80{
81 return to_integer(src, dest_width, false);
82}
83
85 const bvt &src,
86 std::size_t dest_width,
87 bool is_signed)
88{
89 PRECONDITION(src.size() == spec.width());
90
91 // The following is the usual case in ANSI-C, and we optimize for that.
93
94 // Keep in sync with float_bvt::to_integer — the body below is a
95 // line-for-line translation of that function between the
96 // literalt/bvt and exprt APIs.
97
98 const unbiased_floatt unpacked = unpack(src);
99
100 bvt fraction = unpacked.fraction;
101
102 if(dest_width > fraction.size())
103 {
105 bv_utils.build_constant(0U, dest_width - fraction.size());
106 fraction.insert(
107 fraction.begin(), lsb_extension.begin(), lsb_extension.end());
108 }
109
110 // if the exponent is positive, shift right by (fraction.size() - 1) minus
111 // the exponent. The shift distance is built in a bit-vector wide enough to
112 // hold both `fraction.size() - 1` and the (sign-extended) exponent, so it
113 // does not wrap for wide destination types -- e.g. a narrow source such as
114 // _Float16 (spec.e = 5) converted to a 64-bit integer. Keep in sync with
115 // float_bvt::to_integer.
116 const std::size_t distance_width =
117 std::max(unpacked.exponent.size(), address_bits(fraction.size()) + 1);
118 bvt offset = bv_utils.build_constant(fraction.size() - 1, distance_width);
119 bvt distance = bv_utils.sub(
123
124 // if the exponent is negative, we have zero anyways
125 bvt result = shift_result;
126 literalt exponent_sign = unpacked.exponent[unpacked.exponent.size() - 1];
127
128 for(std::size_t i = 0; i < result.size(); i++)
129 result[i] = prop.land(result[i], !exponent_sign);
130
131 // chop out the right number of bits from the result
132 if(result.size() > dest_width)
133 {
134 result.resize(dest_width);
135 }
136
137 INVARIANT(
138 result.size() == dest_width,
139 "result bitvector width should equal the destination bitvector width");
140
141 // if signed, apply sign.
142 if(is_signed)
143 result = bv_utils.cond_negate(result, unpacked.sign);
144 else
145 {
146 // It's unclear what the behaviour for negative floats
147 // to integer shall be.
148 }
149
150 return result;
151}
152
154{
155 unbiased_floatt result;
156
157 result.sign=const_literal(src.get_sign());
158 result.NaN=const_literal(src.is_NaN());
159 result.infinity=const_literal(src.is_infinity());
162
163 return pack(bias(result));
164}
165
167{
168 PRECONDITION(src.size() == spec.width());
169
170 // Zero? NaN? Infinity?
171 auto unpacked = unpack(src);
172 auto is_special = prop.lor({unpacked.zero, unpacked.NaN, unpacked.infinity});
173
174 // add 2^f, where f is the number of fraction bits,
175 // by adding f to the exponent
178
180
181 // abs(x) >= magic_number? If so, then there is no fractional part.
183
184 magic_number_bv.back() = src.back(); // copy sign bit
185
186 auto tmp1 = add_sub(src, magic_number_bv, false);
187
188 auto tmp2 = add_sub(tmp1, magic_number_bv, true);
189
190 // restore the original sign bit
191 tmp2.back() = src.back();
192
194}
195
197 const bvt &src,
199{
200 PRECONDITION(src.size() == spec.width());
201
202 #if 1
203 // Catch the special case in which we extend,
204 // e.g. single to double.
205 // In this case, rounding can be avoided,
206 // but a denormal number may be come normal.
207 // Be careful to exclude the difficult case
208 // when denormalised numbers in the old format
209 // can be converted to denormalised numbers in the
210 // new format. Note that this is rare and will only
211 // happen with very non-standard formats.
212
213 int sourceSmallestNormalExponent=-((1 << (spec.e - 1)) - 1);
216
217 // Using the fact that f doesn't include the hidden bit
218
219 int destSmallestNormalExponent=-((1 << (dest_spec.e - 1)) - 1);
220
221 if(dest_spec.e>=spec.e &&
222 dest_spec.f>=spec.f &&
224 {
226 unbiased_floatt result;
227
228 // the fraction gets zero-padded
229 std::size_t padding=dest_spec.f-spec.f;
230 result.fraction=
232
233 // the exponent gets sign-extended
234 result.exponent=
236
237 // if the number was denormal and is normal in the new format,
238 // normalise it!
239 if(dest_spec.e > spec.e)
240 {
241 normalization_shift(result.fraction, result.exponent);
242 // normalization_shift unconditionally extends the exponent size to avoid
243 // arithmetic overflow, but this cannot have happened here as the exponent
244 // had already been extended to dest_spec's size
245 result.exponent.resize(dest_spec.e);
246 }
247
248 // the flags get copied
249 result.sign=unpacked_src.sign;
250 result.NaN=unpacked_src.NaN;
251 result.infinity=unpacked_src.infinity;
252
253 // no rounding needed!
255 return pack(bias(result));
256 }
257 else // NOLINT(readability/braces)
258 #endif
259 {
260 // we actually need to round
261 unbiased_floatt result=unpack(src);
263 return round_and_pack(result);
264 }
265}
266
268{
269 return prop.land(
270 !exponent_all_zeros(src),
271 !exponent_all_ones(src));
272}
273
276 const unbiased_floatt &src1,
277 const unbiased_floatt &src2)
278{
279 // extend both
281 bv_utils.sign_extension(src1.exponent, src1.exponent.size()+1);
283 bv_utils.sign_extension(src2.exponent, src2.exponent.size()+1);
284
286
287 // compute shift distance (here is the subtraction)
289}
290
292 const bvt &src1,
293 const bvt &src2,
294 bool subtract)
295{
298
299 // subtract?
300 if(subtract)
301 unpacked2.sign=!unpacked2.sign;
302
303 // figure out which operand has the bigger exponent
306
307 const bvt bigger_exponent=
308 bv_utils.select(src2_bigger, unpacked2.exponent, unpacked1.exponent);
309
310 // swap fractions as needed
311 const bvt new_fraction1=
312 bv_utils.select(src2_bigger, unpacked2.fraction, unpacked1.fraction);
313
314 const bvt new_fraction2=
315 bv_utils.select(src2_bigger, unpacked1.fraction, unpacked2.fraction);
316
317 // compute distance
319
320 // limit the distance: shifting more than f+3 bits is unnecessary
321 const bvt limited_dist=limit_distance(distance, spec.f+3);
322
323 // pad fractions with 2 zeros from below
324 const bvt fraction1_padded=
326 const bvt fraction2_padded=
328
329 // shift new_fraction2
334
335 // sticky bit: or of the bits lost by the right-shift
338
339 // need to have two extra fraction bits for addition and rounding
340 const bvt fraction1_ext=
342 const bvt fraction2_ext=
344
345 unbiased_floatt result;
346
347 // now add/sub them
349 result.fraction=
351
352 // sign of result
353 literalt fraction_sign=result.fraction.back();
355
357
358 // adjust the exponent for the fact that we added two bits to the fraction
359 result.exponent=
361 bv_utils.sign_extension(result.exponent, result.exponent.size()+1),
362 bv_utils.build_constant(2, result.exponent.size()+1));
363
364 // NaN?
365 result.NaN=prop.lor(
366 prop.land(prop.land(unpacked1.infinity, unpacked2.infinity),
367 prop.lxor(unpacked1.sign, unpacked2.sign)),
368 prop.lor(unpacked1.NaN, unpacked2.NaN));
369
370 // infinity?
371 result.infinity=prop.land(
372 !result.NaN,
373 prop.lor(unpacked1.infinity, unpacked2.infinity));
374
375 // zero?
376 // Note that:
377 // 1. The zero flag isn't used apart from in divide and
378 // is only set on unpack
379 // 2. Subnormals mean that addition or subtraction can't round to 0,
380 // thus we can perform this test now
381 // 3. The rules for sign are different for zero
382 result.zero=prop.land(
383 !prop.lor(result.infinity, result.NaN),
384 !prop.lor(result.fraction));
385
386
387 // sign
391
393 prop.lselect(unpacked1.infinity, unpacked1.sign, unpacked2.sign);
394
395 #if 1
398 prop.lor(unpacked1.sign, unpacked2.sign),
399 prop.land(unpacked1.sign, unpacked2.sign));
400
401 result.sign=prop.lselect(
402 result.infinity,
404 prop.lselect(result.zero,
405 zero_sign,
406 add_sub_sign));
407 #else
408 result.sign=prop.lselect(
409 result.infinity,
412 #endif
413
414 #if 0
415 result.sign=const_literal(false);
416 result.fraction.resize(spec.f+1, const_literal(true));
417 result.exponent.resize(spec.e, const_literal(false));
418 result.NaN=const_literal(false);
419 result.infinity=const_literal(false);
420 // for(std::size_t i=0; i<result.fraction.size(); i++)
421 // result.fraction[i]=const_literal(true);
422
423 for(std::size_t i=0; i<result.fraction.size(); i++)
424 result.fraction[i]=new_fraction2[i];
425
426 return pack(bias(result));
427 #endif
428
429 return round_and_pack(result);
430}
431
434 const bvt &dist,
436{
437 std::size_t nb_bits = address_bits(limit);
438
440 upper_bits.erase(upper_bits.begin(), upper_bits.begin()+nb_bits);
442
444 lower_bits.resize(nb_bits);
445
446 bvt result;
447 result.resize(lower_bits.size());
448
449 // bitwise or with or_upper_bits
450 for(std::size_t i=0; i<result.size(); i++)
451 result[i]=prop.lor(lower_bits[i], or_upper_bits);
452
453 return result;
454}
455
457{
458 // unpack
461
462 // zero-extend the fractions
463 const bvt fraction1=
464 bv_utils.zero_extension(unpacked1.fraction, unpacked1.fraction.size()*2);
465 const bvt fraction2=
466 bv_utils.zero_extension(unpacked2.fraction, unpacked2.fraction.size()*2);
467
468 // multiply fractions
469 unbiased_floatt result;
471
472 // extend exponents to account for overflow
473 // add two bits, as we do extra arithmetic on it later
474 const bvt exponent1=
475 bv_utils.sign_extension(unpacked1.exponent, unpacked1.exponent.size()+2);
476 const bvt exponent2=
477 bv_utils.sign_extension(unpacked2.exponent, unpacked2.exponent.size()+2);
478
480
481 // adjust, we are thowing in an extra fraction bit
482 // it has been extended above
484
485 // new sign
486 result.sign=prop.lxor(unpacked1.sign, unpacked2.sign);
487
488 // infinity?
489 result.infinity=prop.lor(unpacked1.infinity, unpacked2.infinity);
490
491 // NaN?
492 {
494
495 NaN_cond.push_back(is_NaN(src1));
496 NaN_cond.push_back(is_NaN(src2));
497
498 // infinity * 0 is NaN!
499 NaN_cond.push_back(prop.land(unpacked1.zero, unpacked2.infinity));
500 NaN_cond.push_back(prop.land(unpacked2.zero, unpacked1.infinity));
501
502 result.NaN=prop.lor(NaN_cond);
503 }
504
505 return round_and_pack(result);
506}
507
509 const bvt &multiply_lhs,
510 const bvt &multiply_rhs,
511 const bvt &addend)
512{
513 // Fused multiply-add: round(src1 * src2 + src3) with a single rounding.
514 // The product src1 * src2 is computed exactly (double-width fraction),
515 // then src3 is added, and the result is rounded once.
516
520
521 // --- Exact product a*b ---
522 const std::size_t frac_size = unpacked_lhs.fraction.size(); // f+1
523
527 // Product fraction has width 2*(f+1) bits (double-width fraction w.r.t.
528 // inputs).
529 // The value is prod_fraction * 2^(prod_exponent - (prod_fraction.size()-1)).
530 // Keep full width for exact intermediate result.
531
534 unpacked_lhs.exponent, unpacked_lhs.exponent.size() + 2),
536 unpacked_rhs.exponent, unpacked_rhs.exponent.size() + 2));
538
540
541 // --- Align c's fraction to the product's wider format ---
542 // Product fraction: prod_width bits, binary point after MSB.
543 // c fraction: (f+1) bits. Pad on the right to match width, then
544 // adjust exponent to compensate.
545 const std::size_t prod_width = prod_fraction.size();
546 const std::size_t c_pad = prod_width - frac_size;
551
552 // --- Add product + c (same logic as add_sub) ---
554 literalt c_bigger = exp_diff.back();
555
559
562
565
570
572 bvt small_ext =
574
577
578 literalt fraction_sign = sum.back();
580
581 unbiased_floatt result;
582 result.fraction = sum;
583 result.exponent = bv_utils.add(
585 bv_utils.build_constant(2, bigger_exp.size() + 1));
586
587 // Sign
590
591 // NaN: any input NaN, inf*0, or inf+(-inf) in the addition
592 literalt prod_inf = prop.lor(unpacked_lhs.infinity, unpacked_rhs.infinity);
593 result.NaN = prop.lor(
596 is_NaN(addend),
597 prop.land(unpacked_lhs.zero, unpacked_rhs.infinity),
598 prop.land(unpacked_rhs.zero, unpacked_lhs.infinity),
599 prop.land(
600 prop.land(prod_inf, unpacked_add.infinity),
602
603 result.infinity =
604 prop.land(!result.NaN, prop.lor(prod_inf, unpacked_add.infinity));
605
606 result.zero = prop.land(
607 !prop.lor(result.infinity, result.NaN), !prop.lor(result.fraction));
608
614
615 result.sign = prop.lselect(
616 result.infinity,
619
620 return round_and_pack(result);
621}
622
624{
625 // unpack
628
629 std::size_t div_width=unpacked1.fraction.size()*2+1;
630
631 // pad fraction1 with zeros
632 bvt fraction1=unpacked1.fraction;
633 fraction1.reserve(div_width);
634 while(fraction1.size()<div_width)
635 fraction1.insert(fraction1.begin(), const_literal(false));
636
637 // zero-extend fraction2
638 const bvt fraction2=
640
641 // divide fractions
642 unbiased_floatt result;
643 bvt rem;
645
646 // is there a remainder?
648
649 // we throw this into the result, as one additional bit,
650 // to get the right rounding decision
651 result.fraction.insert(
652 result.fraction.begin(), have_remainder);
653
654 // We will subtract the exponents;
655 // to account for overflow, we add a bit.
656 // we add a second bit for the adjust by extra fraction bits
657 const bvt exponent1=
658 bv_utils.sign_extension(unpacked1.exponent, unpacked1.exponent.size()+2);
659 const bvt exponent2=
660 bv_utils.sign_extension(unpacked2.exponent, unpacked2.exponent.size()+2);
661
662 // subtract exponents
664
665 // adjust, as we have thown in extra fraction bits
666 result.exponent=bv_utils.add(
669
670 // new sign
671 result.sign=prop.lxor(unpacked1.sign, unpacked2.sign);
672
673 // Infinity? This happens when
674 // 1) dividing a non-nan/non-zero by zero, or
675 // 2) first operand is inf and second is non-nan and non-zero
676 // In particular, inf/0=inf.
677 result.infinity=
678 prop.lor(
679 prop.land(!unpacked1.zero,
680 prop.land(!unpacked1.NaN,
681 unpacked2.zero)),
682 prop.land(unpacked1.infinity,
683 prop.land(!unpacked2.NaN,
684 !unpacked2.zero)));
685
686 // NaN?
687 result.NaN=prop.lor(unpacked1.NaN,
688 prop.lor(unpacked2.NaN,
689 prop.lor(prop.land(unpacked1.zero, unpacked2.zero),
690 prop.land(unpacked1.infinity, unpacked2.infinity))));
691
692 // Division by infinity produces zero, unless we have NaN
694 prop.land(!unpacked1.NaN, unpacked2.infinity);
695
697 bv_utils.zeros(result.fraction.size()), result.fraction);
698
699 return round_and_pack(result);
700}
701
703{
704 /* The semantics of floating-point remainder implemented as below
705 is the sensible one. Unfortunately this is not the one required
706 by IEEE-754 or fmod / remainder. Martin has discussed the
707 'correct' semantics with Christoph and Alberto at length as
708 well as talking to various hardware designers and we still
709 hasn't found a good way to implement them in a solver.
710 We have some approaches that are correct but they really
711 don't scale. */
712
714
715 // stub: do (src2.infinity ? src1 : (src1/src2)*src2))
716 return bv_utils.select(
717 unpacked2.infinity, src1, sub(src1, mul(div(src1, src2), src2)));
718}
719
721{
722 PRECONDITION(!src.empty());
723 bvt result=src;
724 literalt &sign_bit=result[result.size()-1];
726 return result;
727}
728
730{
731 PRECONDITION(!src.empty());
732 bvt result=src;
733 result[result.size()-1]=const_literal(false);
734 return result;
735}
736
738 const bvt &src1,
739 relt rel,
740 const bvt &src2)
741{
742 if(rel==relt::GT)
743 return relation(src2, relt::LT, src1); // swapped
744 else if(rel==relt::GE)
745 return relation(src2, relt::LE, src1); // swapped
746
747 PRECONDITION(rel == relt::EQ || rel == relt::LT || rel == relt::LE);
748
749 // special cases: -0 and 0 are equal
753
754 // NaN compares to nothing
758
759 if(rel==relt::LT || rel==relt::LE)
760 {
762
763 // signs different? trivial! Unless Zero.
764
767
768 // as long as the signs match: compare like unsigned numbers
769
770 // this works due to the BIAS
772
773 // if both are negative (and not the same), need to turn around!
776
779 sign_bit(src1),
780 less_than2);
781
782 if(rel==relt::LT)
783 {
784 bvt and_bv;
785 and_bv.push_back(less_than3);
786 and_bv.push_back(!bitwise_equal); // for the case of two negative numbers
787 and_bv.push_back(!both_zero);
788 and_bv.push_back(!NaN);
789
790 return prop.land(and_bv);
791 }
792 else if(rel==relt::LE)
793 {
794 bvt or_bv;
795 or_bv.push_back(less_than3);
796 or_bv.push_back(both_zero);
797 or_bv.push_back(bitwise_equal);
798
799 return prop.land(prop.lor(or_bv), !NaN);
800 }
801 else
803 }
804 else if(rel==relt::EQ)
805 {
807
808 return prop.land(
810 !NaN);
811 }
812
813 // not reached
815 return const_literal(false);
816}
817
819{
820 PRECONDITION(!src.empty());
822 all_but_sign=src;
823 all_but_sign.resize(all_but_sign.size()-1);
825}
826
828{
829 bvt and_bv;
830 and_bv.push_back(!sign_bit(src));
831 and_bv.push_back(exponent_all_ones(src));
832 and_bv.push_back(fraction_all_zeros(src));
833 return prop.land(and_bv);
834}
835
837{
838 return prop.land(
840 fraction_all_zeros(src));
841}
842
845{
846 return bv_utils.extract(src, spec.f, spec.f+spec.e-1);
847}
848
851{
852 return bv_utils.extract(src, 0, spec.f-1);
853}
854
856{
857 bvt and_bv;
858 and_bv.push_back(sign_bit(src));
859 and_bv.push_back(exponent_all_ones(src));
860 and_bv.push_back(fraction_all_zeros(src));
861 return prop.land(and_bv);
862}
863
865{
866 return prop.land(exponent_all_ones(src),
867 !fraction_all_zeros(src));
868}
869
871{
872 bvt exponent=src;
873
874 // removes the fractional part
875 exponent.erase(exponent.begin(), exponent.begin()+spec.f);
876
877 // removes the sign
878 exponent.resize(spec.e);
879
880 return bv_utils.is_all_ones(exponent);
881}
882
884{
885 bvt exponent=src;
886
887 // removes the fractional part
888 exponent.erase(exponent.begin(), exponent.begin()+spec.f);
889
890 // removes the sign
891 exponent.resize(spec.e);
892
893 return bv_utils.is_zero(exponent);
894}
895
897{
898 PRECONDITION(src.size() == spec.width());
899 // does not include hidden bit
900 bvt tmp=src;
901 tmp.resize(spec.f);
902 return bv_utils.is_zero(tmp);
903}
904
906void float_utilst::normalization_shift(bvt &fraction, bvt &exponent)
907{
908 #if 0
909 // this thing is quadratic!
910
911 bvt new_fraction=prop.new_variables(fraction.size());
912 bvt new_exponent=prop.new_variables(exponent.size());
913
914 // i is the shift distance
915 for(std::size_t i=0; i<fraction.size(); i++)
916 {
917 bvt equal;
918
919 // the bits above need to be zero
920 for(std::size_t j=0; j<i; j++)
921 equal.push_back(
922 !fraction[fraction.size()-1-j]);
923
924 // this one needs to be one
925 equal.push_back(fraction[fraction.size()-1-i]);
926
927 // iff all of that holds, we shift here!
928 literalt shift=prop.land(equal);
929
930 // build shifted value
933
934 // build new exponent
935 bvt adjustment=bv_utils.build_constant(-i, exponent.size());
938 }
939
940 // Fraction all zero? It stays zero.
941 // The exponent is undefined in that case.
944 zero_fraction.resize(fraction.size(), const_literal(false));
946
947 fraction=new_fraction;
948 exponent=new_exponent;
949
950 #else
951
952 // n-log-n alignment shifter.
953 // The worst-case shift is the number of fraction
954 // bits minus one, in case the fraction is one exactly.
955 PRECONDITION(!fraction.empty());
956 std::size_t depth = address_bits(fraction.size() - 1);
957
958 // sign-extend to ensure the arithmetic below cannot result in overflow/underflow
959 exponent =
960 bv_utils.sign_extension(exponent, std::max(depth, exponent.size() + 1));
961
962 bvt exponent_delta=bv_utils.zeros(exponent.size());
963
964 // Fraction smaller than the max distance? Pad up with zeros.
965 std::size_t max_distance = 1 << (depth - 1);
966
967 if(fraction.size() < max_distance)
968 fraction = bv_utils.zero_extension(fraction, max_distance);
969
970 for(int d=depth-1; d>=0; d--)
971 {
972 std::size_t distance=(1<<d);
973
974 INVARIANT(
975 fraction.size() >= distance, "fraction must be larger or equal distance");
976
977 // check if first 'distance'-many bits are zeros
978 const bvt prefix=bv_utils.extract_msb(fraction, distance);
980
981 // If so, shift the zeros out left by 'distance'.
982 // Otherwise, leave as is.
983 const bvt shifted=
984 bv_utils.shift(fraction, bv_utilst::shiftt::SHIFT_LEFT, distance);
985
986 fraction=
988
989 // add corresponding weight to exponent
990 INVARIANT(
991 d < (signed)exponent_delta.size(),
992 "depth must be smaller than exponent size");
994 }
995
996 exponent=bv_utils.sub(exponent, exponent_delta);
997
998 #endif
999}
1000
1003{
1004 PRECONDITION(exponent.size() >= spec.e);
1005
1007
1008 // Is the exponent strictly less than -bias+1, i.e., exponent<-bias+1?
1009 // This is transformed to distance=(-bias+1)-exponent
1010 // i.e., distance>0
1011 // Note that 1-bias is the exponent represented by 0...01,
1012 // i.e. the exponent of the smallest normal number and thus the 'base'
1013 // exponent for subnormal numbers.
1014
1015#if 1
1016 // Need to sign extend to avoid overflow. Note that this is a
1017 // relatively rare problem as the value needs to be close to the top
1018 // of the exponent range and then range must not have been
1019 // previously extended as add, multiply, etc. do. This is primarily
1020 // to handle casting down from larger ranges.
1021 exponent=bv_utils.sign_extension(exponent, exponent.size() + 1);
1022#endif
1023
1024 bvt distance=bv_utils.sub(
1025 bv_utils.build_constant(-bias+1, exponent.size()), exponent);
1026
1027 // use sign bit
1029 !distance.back(),
1030 !bv_utils.is_zero(distance));
1031
1032#if 1
1033 // Care must be taken to not loose information required for the
1034 // guard and sticky bits. +3 is for the hidden, guard and sticky bits.
1035 if(fraction.size() < (spec.f + 3))
1036 {
1037 // Add zeros at the LSB end for the guard bit to shift into
1038 fraction=
1039 bv_utils.concatenate(bv_utils.zeros((spec.f + 3) - fraction.size()),
1040 fraction);
1041 }
1042
1043 bvt denormalisedFraction=fraction;
1044
1047 sticky_right_shift(fraction, distance, sticky_bit);
1049
1050 fraction=
1052 denormal,
1054 fraction);
1055
1056#else
1057 fraction=
1059 denormal,
1060 bv_utils.shift(fraction, bv_utilst::LRIGHT, distance),
1061 fraction);
1062#endif
1063
1064 exponent=
1066 bv_utils.build_constant(-bias, exponent.size()),
1067 exponent);
1068}
1069
1071{
1072 // incoming: some fraction (with explicit 1),
1073 // some exponent without bias
1074 // outgoing: rounded, with right size, but still unpacked
1075
1078
1079 {
1080 std::size_t exponent_bits = std::max(address_bits(spec.f), spec.e) + 1;
1081
1082 // before normalization, make sure exponent is large enough
1084 {
1085 // sign extend
1088 }
1089 }
1090
1091 // align it!
1094
1095 unbiased_floatt result;
1098 result.sign=src.sign;
1099 result.NaN=src.NaN;
1100 result.infinity=src.infinity;
1101
1102 round_fraction(result);
1103 round_exponent(result);
1104
1105 return result;
1106}
1107
1109{
1110 return pack(bias(rounder(src)));
1111}
1112
1115 const std::size_t dest_bits,
1116 const literalt sign,
1117 const bvt &fraction)
1118{
1119 PRECONDITION(dest_bits < fraction.size());
1120
1121 // we have too many fraction bits
1122 std::size_t extra_bits=fraction.size()-dest_bits;
1123
1124 // more than two extra bits are superflus, and are
1125 // turned into a sticky bit
1126
1128
1129 if(extra_bits>=2)
1130 {
1131 // We keep most-significant bits, and thus the tail is made
1132 // of least-significant bits.
1133 bvt tail=bv_utils.extract(fraction, 0, extra_bits-2);
1134 sticky_bit=prop.lor(tail);
1135 }
1136
1137 // the rounding bit is the last extra bit
1138 INVARIANT(
1139 extra_bits >= 1, "the extra bits include at least the rounding bit");
1140 literalt rounding_bit=fraction[extra_bits-1];
1141
1142 // we get one bit of the fraction for some rounding decisions
1144
1145 // round-to-nearest (ties to even)
1146 literalt round_to_even=
1149
1150 // round up
1151 literalt round_to_plus_inf=
1152 prop.land(!sign,
1154
1155 // round down
1156 literalt round_to_minus_inf=
1157 prop.land(sign,
1159
1160 // round to zero
1161 literalt round_to_zero=
1162 const_literal(false);
1163
1164 // round-to-nearest (ties to away)
1165 literalt round_to_away = rounding_bit;
1166
1167 // now select appropriate one
1168 // clang-format off
1169 return prop.lselect(rounding_mode_bits.round_to_even, round_to_even,
1174 prop.new_variable()))))); // otherwise non-det
1175 // clang-format on
1176}
1177
1179{
1180 std::size_t fraction_size=spec.f+1;
1181
1182 // do we need to enlarge the fraction?
1183 if(result.fraction.size()<fraction_size)
1184 {
1185 // pad with zeros at bottom
1186 std::size_t padding=fraction_size-result.fraction.size();
1187
1190 result.fraction);
1191
1192 INVARIANT(
1193 result.fraction.size() == fraction_size,
1194 "sizes should be equal as result.fraction was zero-padded");
1195 }
1196 else if(result.fraction.size()==fraction_size) // it stays
1197 {
1198 // do nothing
1199 }
1200 else // fraction gets smaller -- rounding
1201 {
1202 std::size_t extra_bits=result.fraction.size()-fraction_size;
1203 INVARIANT(
1204 extra_bits >= 1,
1205 "the extra bits should at least include the rounding bit");
1206
1207 // this computes the rounding decision
1209 fraction_size, result.sign, result.fraction);
1210
1211 // chop off all the extra bits
1212 result.fraction=bv_utils.extract(
1213 result.fraction, extra_bits, result.fraction.size()-1);
1214
1215 INVARIANT(
1216 result.fraction.size() == fraction_size,
1217 "sizes should be equal as extra bits were chopped off from "
1218 "result.fraction");
1219
1220#if 0
1221 // *** does not catch when the overflow goes subnormal -> normal ***
1222 // incrementing the fraction might result in an overflow
1223 result.fraction=
1224 bv_utils.zero_extension(result.fraction, result.fraction.size()+1);
1225
1226 result.fraction=bv_utils.incrementer(result.fraction, increment);
1227
1228 literalt overflow=result.fraction.back();
1229
1230 // In case of an overflow, the exponent has to be incremented.
1231 // "Post normalization" is then required.
1232 result.exponent=
1234
1235 // post normalization of the fraction
1236 literalt integer_part1=result.fraction.back();
1237 literalt integer_part0=result.fraction[result.fraction.size()-2];
1239
1240 result.fraction.resize(result.fraction.size()-1);
1241 result.fraction.back()=new_integer_part;
1242
1243#else
1244 // When incrementing due to rounding there are two edge
1245 // cases we need to be aware of:
1246 // 1. If the number is normal, the increment can overflow.
1247 // In this case we need to increment the exponent and
1248 // set the MSB of the fraction to 1.
1249 // 2. If the number is the largest subnormal, the increment
1250 // can change the MSB making it normal. Thus the exponent
1251 // must be incremented but the fraction will be OK.
1252 literalt oldMSB=result.fraction.back();
1253
1254 result.fraction=bv_utils.incrementer(result.fraction, increment);
1255
1256 // Normal overflow when old MSB == 1 and new MSB == 0
1257 literalt overflow=prop.land(oldMSB, neg(result.fraction.back()));
1258
1259 // Subnormal to normal transition when old MSB == 0 and new MSB == 1
1261 prop.land(neg(oldMSB), result.fraction.back());
1262
1263 // In case of an overflow or subnormal to normal conversion,
1264 // the exponent has to be incremented.
1265 result.exponent=
1268
1269 // post normalization of the fraction
1270 // In the case of overflow, set the MSB to 1
1271 // The subnormal case will have (only) the MSB set to 1
1272 result.fraction.back()=prop.lor(result.fraction.back(), overflow);
1273#endif
1274 }
1275}
1276
1278{
1279 PRECONDITION(result.exponent.size() >= spec.e);
1280
1281 // do we need to enlarge the exponent?
1282 if(result.exponent.size() == spec.e) // it stays
1283 {
1284 // do nothing
1285 }
1286 else // exponent gets smaller -- chop off top bits
1287 {
1288 bvt old_exponent=result.exponent;
1289 result.exponent.resize(spec.e);
1290
1291 // max_exponent is the maximum representable
1292 // i.e. 1 higher than the maximum possible for a normal number
1293 bvt max_exponent=
1295 spec.max_exponent()-spec.bias(), old_exponent.size());
1296
1297 // the exponent is garbage if the fractional is zero
1298
1300 prop.land(
1301 !bv_utils.signed_less_than(old_exponent, max_exponent),
1302 !bv_utils.is_zero(result.fraction));
1303
1304#if 1
1305 // Directed rounding modes round overflow to the maximum normal
1306 // depending on the particular mode and the sign
1310 !result.sign),
1312 result.sign)));
1313
1316
1317
1320 spec.max_exponent()-(spec.bias() + 1), result.exponent.size());
1321
1322 result.exponent=
1324
1325 result.fraction=
1327 bv_utils.inverted(bv_utils.zeros(result.fraction.size())),
1328 result.fraction);
1329
1330 result.infinity=prop.lor(result.infinity,
1333#else
1335#endif
1336 }
1337}
1338
1341{
1342 PRECONDITION(src.fraction.size() == spec.f + 1);
1343
1344 biased_floatt result;
1345
1346 result.sign=src.sign;
1347 result.NaN=src.NaN;
1348 result.infinity=src.infinity;
1349
1350 // we need to bias the new exponent
1351 result.exponent=add_bias(src.exponent);
1352
1353 // strip off hidden bit
1354
1355 literalt hidden_bit=src.fraction[src.fraction.size()-1];
1357
1358 result.fraction=src.fraction;
1359 result.fraction.resize(spec.f);
1360
1361 // make exponent zero if its denormal
1362 // (includes zero)
1363 for(std::size_t i=0; i<result.exponent.size(); i++)
1364 result.exponent[i]=
1365 prop.land(result.exponent[i], !denormal);
1366
1367 return result;
1368}
1369
1371{
1372 PRECONDITION(src.size() == spec.e);
1373
1374 return bv_utils.add(
1375 src,
1377}
1378
1380{
1381 PRECONDITION(src.size() == spec.e);
1382
1383 return bv_utils.sub(
1384 src,
1386}
1387
1389{
1390 PRECONDITION(src.size() == spec.width());
1391
1392 unbiased_floatt result;
1393
1394 result.sign=sign_bit(src);
1395
1396 result.fraction=get_fraction(src);
1397 result.fraction.push_back(is_normal(src)); // add hidden bit
1398
1399 result.exponent=get_exponent(src);
1400 CHECK_RETURN(result.exponent.size() == spec.e);
1401
1402 // unbias the exponent
1404
1405 result.exponent=
1408 sub_bias(result.exponent));
1409
1410 result.infinity=is_infinity(src);
1411 result.zero=is_zero(src);
1412 result.NaN=is_NaN(src);
1413
1414 return result;
1415}
1416
1418{
1419 PRECONDITION(src.fraction.size() == spec.f);
1420 PRECONDITION(src.exponent.size() == spec.e);
1421
1422 bvt result;
1423 result.resize(spec.width());
1424
1425 // do sign
1426 // we make this 'false' for NaN
1427 result[result.size()-1]=
1428 prop.lselect(src.NaN, const_literal(false), src.sign);
1429
1431 prop.lor(src.NaN, src.infinity);
1432
1433 // just copy fraction
1434 for(std::size_t i=0; i<spec.f; i++)
1435 result[i]=prop.land(src.fraction[i], !infinity_or_NaN);
1436
1437 result[0]=prop.lor(result[0], src.NaN);
1438
1439 // do exponent
1440 for(std::size_t i=0; i<spec.e; i++)
1441 result[i+spec.f]=prop.lor(
1442 src.exponent[i],
1444
1445 return result;
1446}
1447
1449{
1451
1452 for(std::size_t i=0; i<src.size(); i++)
1453 int_value+=power(2, i)*prop.l_get(src[i]).is_true();
1454
1455 ieee_float_valuet result;
1456 result.spec=spec;
1457 result.unpack(int_value);
1458
1459 return result;
1460}
1461
1463 const bvt &op,
1464 const bvt &dist,
1466{
1467 std::size_t d=1;
1468 bvt result=op;
1469 sticky=const_literal(false);
1470
1471 for(std::size_t stage=0; stage<dist.size(); stage++)
1472 {
1473 if(dist[stage]!=const_literal(false))
1474 {
1476
1477 bvt lost_bits;
1478
1479 if(d<=result.size())
1480 lost_bits=bv_utils.extract(result, 0, d-1);
1481 else
1482 lost_bits=result;
1483
1484 sticky=prop.lor(
1486 sticky);
1487
1488 result=bv_utils.select(dist[stage], tmp, result);
1489 }
1490
1491 d=d<<1;
1492 }
1493
1494 return result;
1495}
1496
1498 const bvt &src1,
1499 const bvt &)
1500{
1501 return src1;
1502}
1503
1505 const bvt &op0,
1506 const bvt &)
1507{
1508 return op0;
1509}
std::size_t address_bits(const mp_integer &size)
ceil(log2(size))
mp_integer power(const mp_integer &base, const mp_integer &exponent)
A multi-precision implementation of the power operator.
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:566
static bvt inverted(const bvt &op)
Definition bv_utils.cpp:648
literalt signed_less_than(const bvt &bv0, const bvt &bv1)
literalt is_all_ones(const bvt &op)
Definition bv_utils.h:163
literalt is_not_zero(const bvt &op)
Definition bv_utils.h:151
static bvt extract_msb(const bvt &a, std::size_t n)
Definition bv_utils.cpp:69
bvt add(const bvt &op0, const bvt &op1)
Definition bv_utils.h:71
static bvt zero_extension(const bvt &bv, std::size_t new_size)
Definition bv_utils.h:192
bvt absolute_value(const bvt &op)
bvt select(literalt s, const bvt &a, const bvt &b)
If s is true, selects a otherwise selects b.
Definition bv_utils.cpp:107
static bvt build_constant(const mp_integer &i, std::size_t width)
Definition bv_utils.cpp:16
literalt is_zero(const bvt &op)
Definition bv_utils.h:148
literalt equal(const bvt &op0, const bvt &op1)
Bit-blasting ID_equal and use in other encodings.
void cond_implies_equal(literalt cond, const bvt &a, const bvt &b)
literalt unsigned_less_than(const bvt &bv0, const bvt &bv1)
bvt incrementer(const bvt &op, literalt carry_in)
Definition bv_utils.cpp:640
bvt add_sub(const bvt &op0, const bvt &op1, bool subtract)
Definition bv_utils.cpp:349
static bvt shift(const bvt &op, const shiftt shift, std::size_t distance)
Definition bv_utils.cpp:548
static bvt concatenate(const bvt &a, const bvt &b)
Definition bv_utils.cpp:91
bvt sub(const bvt &op0, const bvt &op1)
Definition bv_utils.h:72
static bvt extract(const bvt &a, std::size_t first, std::size_t last)
Definition bv_utils.cpp:53
bvt inc(const bvt &op)
Definition bv_utils.h:38
bvt unsigned_multiplier(const bvt &op0, const bvt &op1)
Definition bv_utils.cpp:930
void unsigned_divider(const bvt &op0, const bvt &op1, bvt &res, bvt &rem)
bvt cond_negate(const bvt &bv, const literalt cond)
static bvt zeros(std::size_t new_size)
Definition bv_utils.h:197
static bvt sign_extension(const bvt &bv, std::size_t new_size)
Definition bv_utils.h:187
unbiased_floatt rounder(const unbiased_floatt &)
bvt to_integer(const bvt &src, std::size_t int_width, bool is_signed)
literalt is_NaN(const bvt &)
virtual void normalization_shift(bvt &fraction, bvt &exponent)
normalize fraction/exponent pair returns 'zero' if fraction is zero
bv_utilst bv_utils
bvt debug2(const bvt &op0, const bvt &op1)
virtual bvt rem(const bvt &src1, const bvt &src2)
bvt round_to_integral(const bvt &)
literalt is_plus_inf(const bvt &)
ieee_float_valuet get(const bvt &) const
literalt is_infinity(const bvt &)
void set_rounding_mode(const bvt &)
void round_exponent(unbiased_floatt &result)
void round_fraction(unbiased_floatt &result)
bvt sticky_right_shift(const bvt &op, const bvt &dist, literalt &sticky)
unbiased_floatt unpack(const bvt &)
bvt from_unsigned_integer(const bvt &)
virtual bvt mul(const bvt &src1, const bvt &src2)
bvt debug1(const bvt &op0, const bvt &op1)
bvt add_bias(const bvt &exponent)
bvt round_and_pack(const unbiased_floatt &)
bvt subtract_exponents(const unbiased_floatt &src1, const unbiased_floatt &src2)
Subtracts the exponents.
bvt get_fraction(const bvt &)
Gets the fraction without hidden bit in a floating-point bit-vector src.
literalt is_minus_inf(const bvt &)
literalt fraction_rounding_decision(const std::size_t dest_bits, const literalt sign, const bvt &fraction)
rounding decision for fraction using sticky bit
bvt get_exponent(const bvt &)
Gets the unbiased exponent in a floating-point bit-vector.
void denormalization_shift(bvt &fraction, bvt &exponent)
make sure exponent is not too small; the exponent is unbiased
bvt to_unsigned_integer(const bvt &src, std::size_t int_width)
bvt build_constant(const ieee_float_valuet &)
virtual bvt div(const bvt &src1, const bvt &src2)
bvt negate(const bvt &)
literalt exponent_all_zeros(const bvt &)
literalt fraction_all_zeros(const bvt &)
bvt fma(const bvt &multiply_lhs, const bvt &multiply_rhs, const bvt &addend)
Fused multiply-add: round(multiply_lhs * multiply_rhs + addend) with a single rounding step.
bvt from_signed_integer(const bvt &)
literalt is_zero(const bvt &)
bvt sub(const bvt &src1, const bvt &src2)
bvt sub_bias(const bvt &exponent)
bvt limit_distance(const bvt &dist, mp_integer limit)
Limits the shift distance.
bvt conversion(const bvt &src, const ieee_float_spect &dest_spec)
bvt pack(const biased_floatt &)
virtual bvt add_sub(const bvt &src1, const bvt &src2, bool subtract)
bvt abs(const bvt &)
static literalt sign_bit(const bvt &src)
Definition float_utils.h:98
ieee_float_spect spec
Definition float_utils.h:94
literalt exponent_all_ones(const bvt &)
bvt to_signed_integer(const bvt &src, std::size_t int_width)
literalt is_normal(const bvt &)
literalt relation(const bvt &src1, relt rel, const bvt &src2)
rounding_mode_bitst rounding_mode_bits
Definition float_utils.h:73
biased_floatt bias(const unbiased_floatt &)
takes an unbiased float, and applies the bias
mp_integer bias() const
mp_integer max_exponent() const
std::size_t f
Definition ieee_float.h:26
std::size_t width() const
Definition ieee_float.h:50
std::size_t e
Definition ieee_float.h:26
An IEEE 754 floating-point value, including specificiation.
Definition ieee_float.h:117
bool is_NaN() const
Definition ieee_float.h:259
ieee_float_spect spec
Definition ieee_float.h:119
bool get_sign() const
Definition ieee_float.h:254
const mp_integer & get_fraction() const
Definition ieee_float.h:264
void unpack(const mp_integer &)
bool is_infinity() const
Definition ieee_float.h:260
const mp_integer & get_exponent() const
Definition ieee_float.h:263
An IEEE 754 value plus a rounding mode, enabling operations with rounding on values.
Definition ieee_float.h:338
bool is_true() const
Definition literal.h:156
virtual literalt land(literalt a, literalt b)=0
virtual literalt lselect(literalt a, literalt b, literalt c)=0
virtual literalt lxor(literalt a, literalt b)=0
virtual bvt new_variables(std::size_t width)
generates a bitvector of given width with new variables
Definition prop.cpp:30
virtual literalt new_variable()=0
virtual literalt lor(literalt a, literalt b)=0
virtual tvt l_get(literalt a) const =0
literalt neg(literalt a)
Definition literal.h:193
std::vector< literalt > bvt
Definition literal.h:201
literalt const_literal(bool value)
Definition literal.h:188
BigInt mp_integer
Definition smt_terms.h:17
#define CHECK_RETURN(CONDITION)
Definition invariant.h:495
#define UNREACHABLE
This should be used to mark dead code.
Definition invariant.h:525
#define PRECONDITION(CONDITION)
Definition invariant.h:463
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition invariant.h:423
bool is_signed(const typet &t)
Convenience function – is the type signed?
Definition util.cpp:45