CBMC
Loading...
Searching...
No Matches
float_utils.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module:
4
5Author: Daniel Kroening, kroening@kroening.com
6
7\*******************************************************************/
8
9#include "float_utils.h"
10
11#include <algorithm>
12
13#include <util/arith_tools.h>
14
16{
17 bvt round_to_even=
19 bvt round_to_plus_inf=
21 bvt round_to_minus_inf=
23 bvt round_to_zero=
25 bvt round_to_away =
27
29 rounding_mode_bits.round_to_plus_inf=bv_utils.equal(src, round_to_plus_inf);
30 rounding_mode_bits.round_to_minus_inf=bv_utils.equal(src, round_to_minus_inf);
32 rounding_mode_bits.round_to_away = bv_utils.equal(src, round_to_away);
33}
34
36{
37 unbiased_floatt result;
38
39 // we need to convert negative integers
40 result.sign=sign_bit(src);
41
43
44 // build an exponent (unbiased) -- this is signed!
45 result.exponent=
47 src.size()-1,
48 address_bits(src.size() - 1) + 1);
49
50 return round_and_pack(result);
51}
52
54{
55 unbiased_floatt result;
56
57 result.fraction=src;
58
59 // build an exponent (unbiased) -- this is signed!
60 result.exponent=
62 src.size()-1,
63 address_bits(src.size() - 1) + 1);
64
65 result.sign=const_literal(false);
66
67 return round_and_pack(result);
68}
69
71 const bvt &src,
72 std::size_t dest_width)
73{
74 return to_integer(src, dest_width, true);
75}
76
78 const bvt &src,
79 std::size_t dest_width)
80{
81 return to_integer(src, dest_width, false);
82}
83
85 const bvt &src,
86 std::size_t dest_width,
87 bool is_signed)
88{
89 PRECONDITION(src.size() == spec.width());
90
91 // The following is the usual case in ANSI-C, and we optimize for that.
93
94 const unbiased_floatt unpacked = unpack(src);
95
96 bvt fraction = unpacked.fraction;
97
98 if(dest_width > fraction.size())
99 {
101 bv_utils.build_constant(0U, dest_width - fraction.size());
102 fraction.insert(
103 fraction.begin(), lsb_extension.begin(), lsb_extension.end());
104 }
105
106 // if the exponent is positive, shift right
107 bvt offset =
108 bv_utils.build_constant(fraction.size() - 1, unpacked.exponent.size());
109 bvt distance = bv_utils.sub(offset, unpacked.exponent);
112
113 // if the exponent is negative, we have zero anyways
114 bvt result = shift_result;
115 literalt exponent_sign = unpacked.exponent[unpacked.exponent.size() - 1];
116
117 for(std::size_t i = 0; i < result.size(); i++)
118 result[i] = prop.land(result[i], !exponent_sign);
119
120 // chop out the right number of bits from the result
121 if(result.size() > dest_width)
122 {
123 result.resize(dest_width);
124 }
125
126 INVARIANT(
127 result.size() == dest_width,
128 "result bitvector width should equal the destination bitvector width");
129
130 // if signed, apply sign.
131 if(is_signed)
132 result = bv_utils.cond_negate(result, unpacked.sign);
133 else
134 {
135 // It's unclear what the behaviour for negative floats
136 // to integer shall be.
137 }
138
139 return result;
140}
141
143{
144 unbiased_floatt result;
145
146 result.sign=const_literal(src.get_sign());
147 result.NaN=const_literal(src.is_NaN());
148 result.infinity=const_literal(src.is_infinity());
151
152 return pack(bias(result));
153}
154
156{
157 PRECONDITION(src.size() == spec.width());
158
159 // Zero? NaN? Infinity?
160 auto unpacked = unpack(src);
161 auto is_special = prop.lor({unpacked.zero, unpacked.NaN, unpacked.infinity});
162
163 // add 2^f, where f is the number of fraction bits,
164 // by adding f to the exponent
167
169
170 // abs(x) >= magic_number? If so, then there is no fractional part.
172
173 magic_number_bv.back() = src.back(); // copy sign bit
174
175 auto tmp1 = add_sub(src, magic_number_bv, false);
176
177 auto tmp2 = add_sub(tmp1, magic_number_bv, true);
178
179 // restore the original sign bit
180 tmp2.back() = src.back();
181
183}
184
186 const bvt &src,
188{
189 PRECONDITION(src.size() == spec.width());
190
191 #if 1
192 // Catch the special case in which we extend,
193 // e.g. single to double.
194 // In this case, rounding can be avoided,
195 // but a denormal number may be come normal.
196 // Be careful to exclude the difficult case
197 // when denormalised numbers in the old format
198 // can be converted to denormalised numbers in the
199 // new format. Note that this is rare and will only
200 // happen with very non-standard formats.
201
202 int sourceSmallestNormalExponent=-((1 << (spec.e - 1)) - 1);
205
206 // Using the fact that f doesn't include the hidden bit
207
208 int destSmallestNormalExponent=-((1 << (dest_spec.e - 1)) - 1);
209
210 if(dest_spec.e>=spec.e &&
211 dest_spec.f>=spec.f &&
213 {
215 unbiased_floatt result;
216
217 // the fraction gets zero-padded
218 std::size_t padding=dest_spec.f-spec.f;
219 result.fraction=
221
222 // the exponent gets sign-extended
223 result.exponent=
225
226 // if the number was denormal and is normal in the new format,
227 // normalise it!
228 if(dest_spec.e > spec.e)
229 {
230 normalization_shift(result.fraction, result.exponent);
231 // normalization_shift unconditionally extends the exponent size to avoid
232 // arithmetic overflow, but this cannot have happened here as the exponent
233 // had already been extended to dest_spec's size
234 result.exponent.resize(dest_spec.e);
235 }
236
237 // the flags get copied
238 result.sign=unpacked_src.sign;
239 result.NaN=unpacked_src.NaN;
240 result.infinity=unpacked_src.infinity;
241
242 // no rounding needed!
244 return pack(bias(result));
245 }
246 else // NOLINT(readability/braces)
247 #endif
248 {
249 // we actually need to round
250 unbiased_floatt result=unpack(src);
252 return round_and_pack(result);
253 }
254}
255
257{
258 return prop.land(
259 !exponent_all_zeros(src),
260 !exponent_all_ones(src));
261}
262
265 const unbiased_floatt &src1,
266 const unbiased_floatt &src2)
267{
268 // extend both
270 bv_utils.sign_extension(src1.exponent, src1.exponent.size()+1);
272 bv_utils.sign_extension(src2.exponent, src2.exponent.size()+1);
273
275
276 // compute shift distance (here is the subtraction)
278}
279
281 const bvt &src1,
282 const bvt &src2,
283 bool subtract)
284{
287
288 // subtract?
289 if(subtract)
290 unpacked2.sign=!unpacked2.sign;
291
292 // figure out which operand has the bigger exponent
295
296 const bvt bigger_exponent=
297 bv_utils.select(src2_bigger, unpacked2.exponent, unpacked1.exponent);
298
299 // swap fractions as needed
300 const bvt new_fraction1=
301 bv_utils.select(src2_bigger, unpacked2.fraction, unpacked1.fraction);
302
303 const bvt new_fraction2=
304 bv_utils.select(src2_bigger, unpacked1.fraction, unpacked2.fraction);
305
306 // compute distance
308
309 // limit the distance: shifting more than f+3 bits is unnecessary
310 const bvt limited_dist=limit_distance(distance, spec.f+3);
311
312 // pad fractions with 2 zeros from below
313 const bvt fraction1_padded=
315 const bvt fraction2_padded=
317
318 // shift new_fraction2
323
324 // sticky bit: or of the bits lost by the right-shift
327
328 // need to have two extra fraction bits for addition and rounding
329 const bvt fraction1_ext=
331 const bvt fraction2_ext=
333
334 unbiased_floatt result;
335
336 // now add/sub them
338 result.fraction=
340
341 // sign of result
342 literalt fraction_sign=result.fraction.back();
344
346
347 // adjust the exponent for the fact that we added two bits to the fraction
348 result.exponent=
350 bv_utils.sign_extension(result.exponent, result.exponent.size()+1),
351 bv_utils.build_constant(2, result.exponent.size()+1));
352
353 // NaN?
354 result.NaN=prop.lor(
355 prop.land(prop.land(unpacked1.infinity, unpacked2.infinity),
356 prop.lxor(unpacked1.sign, unpacked2.sign)),
357 prop.lor(unpacked1.NaN, unpacked2.NaN));
358
359 // infinity?
360 result.infinity=prop.land(
361 !result.NaN,
362 prop.lor(unpacked1.infinity, unpacked2.infinity));
363
364 // zero?
365 // Note that:
366 // 1. The zero flag isn't used apart from in divide and
367 // is only set on unpack
368 // 2. Subnormals mean that addition or subtraction can't round to 0,
369 // thus we can perform this test now
370 // 3. The rules for sign are different for zero
371 result.zero=prop.land(
372 !prop.lor(result.infinity, result.NaN),
373 !prop.lor(result.fraction));
374
375
376 // sign
380
382 prop.lselect(unpacked1.infinity, unpacked1.sign, unpacked2.sign);
383
384 #if 1
387 prop.lor(unpacked1.sign, unpacked2.sign),
388 prop.land(unpacked1.sign, unpacked2.sign));
389
390 result.sign=prop.lselect(
391 result.infinity,
393 prop.lselect(result.zero,
394 zero_sign,
395 add_sub_sign));
396 #else
397 result.sign=prop.lselect(
398 result.infinity,
401 #endif
402
403 #if 0
404 result.sign=const_literal(false);
405 result.fraction.resize(spec.f+1, const_literal(true));
406 result.exponent.resize(spec.e, const_literal(false));
407 result.NaN=const_literal(false);
408 result.infinity=const_literal(false);
409 // for(std::size_t i=0; i<result.fraction.size(); i++)
410 // result.fraction[i]=const_literal(true);
411
412 for(std::size_t i=0; i<result.fraction.size(); i++)
413 result.fraction[i]=new_fraction2[i];
414
415 return pack(bias(result));
416 #endif
417
418 return round_and_pack(result);
419}
420
423 const bvt &dist,
425{
426 std::size_t nb_bits = address_bits(limit);
427
429 upper_bits.erase(upper_bits.begin(), upper_bits.begin()+nb_bits);
431
433 lower_bits.resize(nb_bits);
434
435 bvt result;
436 result.resize(lower_bits.size());
437
438 // bitwise or with or_upper_bits
439 for(std::size_t i=0; i<result.size(); i++)
440 result[i]=prop.lor(lower_bits[i], or_upper_bits);
441
442 return result;
443}
444
446{
447 // unpack
450
451 // zero-extend the fractions
452 const bvt fraction1=
453 bv_utils.zero_extension(unpacked1.fraction, unpacked1.fraction.size()*2);
454 const bvt fraction2=
455 bv_utils.zero_extension(unpacked2.fraction, unpacked2.fraction.size()*2);
456
457 // multiply fractions
458 unbiased_floatt result;
460
461 // extend exponents to account for overflow
462 // add two bits, as we do extra arithmetic on it later
463 const bvt exponent1=
464 bv_utils.sign_extension(unpacked1.exponent, unpacked1.exponent.size()+2);
465 const bvt exponent2=
466 bv_utils.sign_extension(unpacked2.exponent, unpacked2.exponent.size()+2);
467
469
470 // adjust, we are thowing in an extra fraction bit
471 // it has been extended above
473
474 // new sign
475 result.sign=prop.lxor(unpacked1.sign, unpacked2.sign);
476
477 // infinity?
478 result.infinity=prop.lor(unpacked1.infinity, unpacked2.infinity);
479
480 // NaN?
481 {
483
484 NaN_cond.push_back(is_NaN(src1));
485 NaN_cond.push_back(is_NaN(src2));
486
487 // infinity * 0 is NaN!
488 NaN_cond.push_back(prop.land(unpacked1.zero, unpacked2.infinity));
489 NaN_cond.push_back(prop.land(unpacked2.zero, unpacked1.infinity));
490
491 result.NaN=prop.lor(NaN_cond);
492 }
493
494 return round_and_pack(result);
495}
496
498 const bvt &multiply_lhs,
499 const bvt &multiply_rhs,
500 const bvt &addend)
501{
502 // Fused multiply-add: round(src1 * src2 + src3) with a single rounding.
503 // The product src1 * src2 is computed exactly (double-width fraction),
504 // then src3 is added, and the result is rounded once.
505
509
510 // --- Exact product a*b ---
511 const std::size_t frac_size = unpacked_lhs.fraction.size(); // f+1
512
516 // Product fraction has width 2*(f+1) bits (double-width fraction w.r.t.
517 // inputs).
518 // The value is prod_fraction * 2^(prod_exponent - (prod_fraction.size()-1)).
519 // Keep full width for exact intermediate result.
520
523 unpacked_lhs.exponent, unpacked_lhs.exponent.size() + 2),
525 unpacked_rhs.exponent, unpacked_rhs.exponent.size() + 2));
527
529
530 // --- Align c's fraction to the product's wider format ---
531 // Product fraction: prod_width bits, binary point after MSB.
532 // c fraction: (f+1) bits. Pad on the right to match width, then
533 // adjust exponent to compensate.
534 const std::size_t prod_width = prod_fraction.size();
535 const std::size_t c_pad = prod_width - frac_size;
540
541 // --- Add product + c (same logic as add_sub) ---
543 literalt c_bigger = exp_diff.back();
544
548
551
554
559
561 bvt small_ext =
563
566
567 literalt fraction_sign = sum.back();
569
570 unbiased_floatt result;
571 result.fraction = sum;
572 result.exponent = bv_utils.add(
574 bv_utils.build_constant(2, bigger_exp.size() + 1));
575
576 // Sign
579
580 // NaN: any input NaN, inf*0, or inf+(-inf) in the addition
581 literalt prod_inf = prop.lor(unpacked_lhs.infinity, unpacked_rhs.infinity);
582 result.NaN = prop.lor(
585 is_NaN(addend),
586 prop.land(unpacked_lhs.zero, unpacked_rhs.infinity),
587 prop.land(unpacked_rhs.zero, unpacked_lhs.infinity),
588 prop.land(
589 prop.land(prod_inf, unpacked_add.infinity),
591
592 result.infinity =
593 prop.land(!result.NaN, prop.lor(prod_inf, unpacked_add.infinity));
594
595 result.zero = prop.land(
596 !prop.lor(result.infinity, result.NaN), !prop.lor(result.fraction));
597
603
604 result.sign = prop.lselect(
605 result.infinity,
608
609 return round_and_pack(result);
610}
611
613{
614 // unpack
617
618 std::size_t div_width=unpacked1.fraction.size()*2+1;
619
620 // pad fraction1 with zeros
621 bvt fraction1=unpacked1.fraction;
622 fraction1.reserve(div_width);
623 while(fraction1.size()<div_width)
624 fraction1.insert(fraction1.begin(), const_literal(false));
625
626 // zero-extend fraction2
627 const bvt fraction2=
629
630 // divide fractions
631 unbiased_floatt result;
632 bvt rem;
634
635 // is there a remainder?
637
638 // we throw this into the result, as one additional bit,
639 // to get the right rounding decision
640 result.fraction.insert(
641 result.fraction.begin(), have_remainder);
642
643 // We will subtract the exponents;
644 // to account for overflow, we add a bit.
645 // we add a second bit for the adjust by extra fraction bits
646 const bvt exponent1=
647 bv_utils.sign_extension(unpacked1.exponent, unpacked1.exponent.size()+2);
648 const bvt exponent2=
649 bv_utils.sign_extension(unpacked2.exponent, unpacked2.exponent.size()+2);
650
651 // subtract exponents
653
654 // adjust, as we have thown in extra fraction bits
655 result.exponent=bv_utils.add(
658
659 // new sign
660 result.sign=prop.lxor(unpacked1.sign, unpacked2.sign);
661
662 // Infinity? This happens when
663 // 1) dividing a non-nan/non-zero by zero, or
664 // 2) first operand is inf and second is non-nan and non-zero
665 // In particular, inf/0=inf.
666 result.infinity=
667 prop.lor(
668 prop.land(!unpacked1.zero,
669 prop.land(!unpacked1.NaN,
670 unpacked2.zero)),
671 prop.land(unpacked1.infinity,
672 prop.land(!unpacked2.NaN,
673 !unpacked2.zero)));
674
675 // NaN?
676 result.NaN=prop.lor(unpacked1.NaN,
677 prop.lor(unpacked2.NaN,
678 prop.lor(prop.land(unpacked1.zero, unpacked2.zero),
679 prop.land(unpacked1.infinity, unpacked2.infinity))));
680
681 // Division by infinity produces zero, unless we have NaN
683 prop.land(!unpacked1.NaN, unpacked2.infinity);
684
686 bv_utils.zeros(result.fraction.size()), result.fraction);
687
688 return round_and_pack(result);
689}
690
692{
693 /* The semantics of floating-point remainder implemented as below
694 is the sensible one. Unfortunately this is not the one required
695 by IEEE-754 or fmod / remainder. Martin has discussed the
696 'correct' semantics with Christoph and Alberto at length as
697 well as talking to various hardware designers and we still
698 hasn't found a good way to implement them in a solver.
699 We have some approaches that are correct but they really
700 don't scale. */
701
703
704 // stub: do (src2.infinity ? src1 : (src1/src2)*src2))
705 return bv_utils.select(
706 unpacked2.infinity, src1, sub(src1, mul(div(src1, src2), src2)));
707}
708
710{
711 PRECONDITION(!src.empty());
712 bvt result=src;
713 literalt &sign_bit=result[result.size()-1];
715 return result;
716}
717
719{
720 PRECONDITION(!src.empty());
721 bvt result=src;
722 result[result.size()-1]=const_literal(false);
723 return result;
724}
725
727 const bvt &src1,
728 relt rel,
729 const bvt &src2)
730{
731 if(rel==relt::GT)
732 return relation(src2, relt::LT, src1); // swapped
733 else if(rel==relt::GE)
734 return relation(src2, relt::LE, src1); // swapped
735
736 PRECONDITION(rel == relt::EQ || rel == relt::LT || rel == relt::LE);
737
738 // special cases: -0 and 0 are equal
742
743 // NaN compares to nothing
747
748 if(rel==relt::LT || rel==relt::LE)
749 {
751
752 // signs different? trivial! Unless Zero.
753
756
757 // as long as the signs match: compare like unsigned numbers
758
759 // this works due to the BIAS
761
762 // if both are negative (and not the same), need to turn around!
765
768 sign_bit(src1),
769 less_than2);
770
771 if(rel==relt::LT)
772 {
773 bvt and_bv;
774 and_bv.push_back(less_than3);
775 and_bv.push_back(!bitwise_equal); // for the case of two negative numbers
776 and_bv.push_back(!both_zero);
777 and_bv.push_back(!NaN);
778
779 return prop.land(and_bv);
780 }
781 else if(rel==relt::LE)
782 {
783 bvt or_bv;
784 or_bv.push_back(less_than3);
785 or_bv.push_back(both_zero);
786 or_bv.push_back(bitwise_equal);
787
788 return prop.land(prop.lor(or_bv), !NaN);
789 }
790 else
792 }
793 else if(rel==relt::EQ)
794 {
796
797 return prop.land(
799 !NaN);
800 }
801
802 // not reached
804 return const_literal(false);
805}
806
808{
809 PRECONDITION(!src.empty());
811 all_but_sign=src;
812 all_but_sign.resize(all_but_sign.size()-1);
814}
815
817{
818 bvt and_bv;
819 and_bv.push_back(!sign_bit(src));
820 and_bv.push_back(exponent_all_ones(src));
821 and_bv.push_back(fraction_all_zeros(src));
822 return prop.land(and_bv);
823}
824
826{
827 return prop.land(
829 fraction_all_zeros(src));
830}
831
834{
835 return bv_utils.extract(src, spec.f, spec.f+spec.e-1);
836}
837
840{
841 return bv_utils.extract(src, 0, spec.f-1);
842}
843
845{
846 bvt and_bv;
847 and_bv.push_back(sign_bit(src));
848 and_bv.push_back(exponent_all_ones(src));
849 and_bv.push_back(fraction_all_zeros(src));
850 return prop.land(and_bv);
851}
852
854{
855 return prop.land(exponent_all_ones(src),
856 !fraction_all_zeros(src));
857}
858
860{
861 bvt exponent=src;
862
863 // removes the fractional part
864 exponent.erase(exponent.begin(), exponent.begin()+spec.f);
865
866 // removes the sign
867 exponent.resize(spec.e);
868
869 return bv_utils.is_all_ones(exponent);
870}
871
873{
874 bvt exponent=src;
875
876 // removes the fractional part
877 exponent.erase(exponent.begin(), exponent.begin()+spec.f);
878
879 // removes the sign
880 exponent.resize(spec.e);
881
882 return bv_utils.is_zero(exponent);
883}
884
886{
887 PRECONDITION(src.size() == spec.width());
888 // does not include hidden bit
889 bvt tmp=src;
890 tmp.resize(spec.f);
891 return bv_utils.is_zero(tmp);
892}
893
895void float_utilst::normalization_shift(bvt &fraction, bvt &exponent)
896{
897 #if 0
898 // this thing is quadratic!
899
900 bvt new_fraction=prop.new_variables(fraction.size());
901 bvt new_exponent=prop.new_variables(exponent.size());
902
903 // i is the shift distance
904 for(std::size_t i=0; i<fraction.size(); i++)
905 {
906 bvt equal;
907
908 // the bits above need to be zero
909 for(std::size_t j=0; j<i; j++)
910 equal.push_back(
911 !fraction[fraction.size()-1-j]);
912
913 // this one needs to be one
914 equal.push_back(fraction[fraction.size()-1-i]);
915
916 // iff all of that holds, we shift here!
917 literalt shift=prop.land(equal);
918
919 // build shifted value
922
923 // build new exponent
924 bvt adjustment=bv_utils.build_constant(-i, exponent.size());
927 }
928
929 // Fraction all zero? It stays zero.
930 // The exponent is undefined in that case.
933 zero_fraction.resize(fraction.size(), const_literal(false));
935
936 fraction=new_fraction;
937 exponent=new_exponent;
938
939 #else
940
941 // n-log-n alignment shifter.
942 // The worst-case shift is the number of fraction
943 // bits minus one, in case the fraction is one exactly.
944 PRECONDITION(!fraction.empty());
945 std::size_t depth = address_bits(fraction.size() - 1);
946
947 // sign-extend to ensure the arithmetic below cannot result in overflow/underflow
948 exponent =
949 bv_utils.sign_extension(exponent, std::max(depth, exponent.size() + 1));
950
951 bvt exponent_delta=bv_utils.zeros(exponent.size());
952
953 // Fraction smaller than the max distance? Pad up with zeros.
954 std::size_t max_distance = 1 << (depth - 1);
955
956 if(fraction.size() < max_distance)
957 fraction = bv_utils.zero_extension(fraction, max_distance);
958
959 for(int d=depth-1; d>=0; d--)
960 {
961 std::size_t distance=(1<<d);
962
963 INVARIANT(
964 fraction.size() >= distance, "fraction must be larger or equal distance");
965
966 // check if first 'distance'-many bits are zeros
967 const bvt prefix=bv_utils.extract_msb(fraction, distance);
969
970 // If so, shift the zeros out left by 'distance'.
971 // Otherwise, leave as is.
972 const bvt shifted=
973 bv_utils.shift(fraction, bv_utilst::shiftt::SHIFT_LEFT, distance);
974
975 fraction=
977
978 // add corresponding weight to exponent
979 INVARIANT(
980 d < (signed)exponent_delta.size(),
981 "depth must be smaller than exponent size");
983 }
984
985 exponent=bv_utils.sub(exponent, exponent_delta);
986
987 #endif
988}
989
992{
993 PRECONDITION(exponent.size() >= spec.e);
994
996
997 // Is the exponent strictly less than -bias+1, i.e., exponent<-bias+1?
998 // This is transformed to distance=(-bias+1)-exponent
999 // i.e., distance>0
1000 // Note that 1-bias is the exponent represented by 0...01,
1001 // i.e. the exponent of the smallest normal number and thus the 'base'
1002 // exponent for subnormal numbers.
1003
1004#if 1
1005 // Need to sign extend to avoid overflow. Note that this is a
1006 // relatively rare problem as the value needs to be close to the top
1007 // of the exponent range and then range must not have been
1008 // previously extended as add, multiply, etc. do. This is primarily
1009 // to handle casting down from larger ranges.
1010 exponent=bv_utils.sign_extension(exponent, exponent.size() + 1);
1011#endif
1012
1013 bvt distance=bv_utils.sub(
1014 bv_utils.build_constant(-bias+1, exponent.size()), exponent);
1015
1016 // use sign bit
1018 !distance.back(),
1019 !bv_utils.is_zero(distance));
1020
1021#if 1
1022 // Care must be taken to not loose information required for the
1023 // guard and sticky bits. +3 is for the hidden, guard and sticky bits.
1024 if(fraction.size() < (spec.f + 3))
1025 {
1026 // Add zeros at the LSB end for the guard bit to shift into
1027 fraction=
1028 bv_utils.concatenate(bv_utils.zeros((spec.f + 3) - fraction.size()),
1029 fraction);
1030 }
1031
1032 bvt denormalisedFraction=fraction;
1033
1036 sticky_right_shift(fraction, distance, sticky_bit);
1038
1039 fraction=
1041 denormal,
1043 fraction);
1044
1045#else
1046 fraction=
1048 denormal,
1049 bv_utils.shift(fraction, bv_utilst::LRIGHT, distance),
1050 fraction);
1051#endif
1052
1053 exponent=
1055 bv_utils.build_constant(-bias, exponent.size()),
1056 exponent);
1057}
1058
1060{
1061 // incoming: some fraction (with explicit 1),
1062 // some exponent without bias
1063 // outgoing: rounded, with right size, but still unpacked
1064
1067
1068 {
1069 std::size_t exponent_bits = std::max(address_bits(spec.f), spec.e) + 1;
1070
1071 // before normalization, make sure exponent is large enough
1073 {
1074 // sign extend
1077 }
1078 }
1079
1080 // align it!
1083
1084 unbiased_floatt result;
1087 result.sign=src.sign;
1088 result.NaN=src.NaN;
1089 result.infinity=src.infinity;
1090
1091 round_fraction(result);
1092 round_exponent(result);
1093
1094 return result;
1095}
1096
1098{
1099 return pack(bias(rounder(src)));
1100}
1101
1104 const std::size_t dest_bits,
1105 const literalt sign,
1106 const bvt &fraction)
1107{
1108 PRECONDITION(dest_bits < fraction.size());
1109
1110 // we have too many fraction bits
1111 std::size_t extra_bits=fraction.size()-dest_bits;
1112
1113 // more than two extra bits are superflus, and are
1114 // turned into a sticky bit
1115
1117
1118 if(extra_bits>=2)
1119 {
1120 // We keep most-significant bits, and thus the tail is made
1121 // of least-significant bits.
1122 bvt tail=bv_utils.extract(fraction, 0, extra_bits-2);
1123 sticky_bit=prop.lor(tail);
1124 }
1125
1126 // the rounding bit is the last extra bit
1127 INVARIANT(
1128 extra_bits >= 1, "the extra bits include at least the rounding bit");
1129 literalt rounding_bit=fraction[extra_bits-1];
1130
1131 // we get one bit of the fraction for some rounding decisions
1133
1134 // round-to-nearest (ties to even)
1135 literalt round_to_even=
1138
1139 // round up
1140 literalt round_to_plus_inf=
1141 prop.land(!sign,
1143
1144 // round down
1145 literalt round_to_minus_inf=
1146 prop.land(sign,
1148
1149 // round to zero
1150 literalt round_to_zero=
1151 const_literal(false);
1152
1153 // round-to-nearest (ties to away)
1154 literalt round_to_away = rounding_bit;
1155
1156 // now select appropriate one
1157 // clang-format off
1158 return prop.lselect(rounding_mode_bits.round_to_even, round_to_even,
1163 prop.new_variable()))))); // otherwise non-det
1164 // clang-format on
1165}
1166
1168{
1169 std::size_t fraction_size=spec.f+1;
1170
1171 // do we need to enlarge the fraction?
1172 if(result.fraction.size()<fraction_size)
1173 {
1174 // pad with zeros at bottom
1175 std::size_t padding=fraction_size-result.fraction.size();
1176
1179 result.fraction);
1180
1181 INVARIANT(
1182 result.fraction.size() == fraction_size,
1183 "sizes should be equal as result.fraction was zero-padded");
1184 }
1185 else if(result.fraction.size()==fraction_size) // it stays
1186 {
1187 // do nothing
1188 }
1189 else // fraction gets smaller -- rounding
1190 {
1191 std::size_t extra_bits=result.fraction.size()-fraction_size;
1192 INVARIANT(
1193 extra_bits >= 1,
1194 "the extra bits should at least include the rounding bit");
1195
1196 // this computes the rounding decision
1198 fraction_size, result.sign, result.fraction);
1199
1200 // chop off all the extra bits
1201 result.fraction=bv_utils.extract(
1202 result.fraction, extra_bits, result.fraction.size()-1);
1203
1204 INVARIANT(
1205 result.fraction.size() == fraction_size,
1206 "sizes should be equal as extra bits were chopped off from "
1207 "result.fraction");
1208
1209#if 0
1210 // *** does not catch when the overflow goes subnormal -> normal ***
1211 // incrementing the fraction might result in an overflow
1212 result.fraction=
1213 bv_utils.zero_extension(result.fraction, result.fraction.size()+1);
1214
1215 result.fraction=bv_utils.incrementer(result.fraction, increment);
1216
1217 literalt overflow=result.fraction.back();
1218
1219 // In case of an overflow, the exponent has to be incremented.
1220 // "Post normalization" is then required.
1221 result.exponent=
1223
1224 // post normalization of the fraction
1225 literalt integer_part1=result.fraction.back();
1226 literalt integer_part0=result.fraction[result.fraction.size()-2];
1228
1229 result.fraction.resize(result.fraction.size()-1);
1230 result.fraction.back()=new_integer_part;
1231
1232#else
1233 // When incrementing due to rounding there are two edge
1234 // cases we need to be aware of:
1235 // 1. If the number is normal, the increment can overflow.
1236 // In this case we need to increment the exponent and
1237 // set the MSB of the fraction to 1.
1238 // 2. If the number is the largest subnormal, the increment
1239 // can change the MSB making it normal. Thus the exponent
1240 // must be incremented but the fraction will be OK.
1241 literalt oldMSB=result.fraction.back();
1242
1243 result.fraction=bv_utils.incrementer(result.fraction, increment);
1244
1245 // Normal overflow when old MSB == 1 and new MSB == 0
1246 literalt overflow=prop.land(oldMSB, neg(result.fraction.back()));
1247
1248 // Subnormal to normal transition when old MSB == 0 and new MSB == 1
1250 prop.land(neg(oldMSB), result.fraction.back());
1251
1252 // In case of an overflow or subnormal to normal conversion,
1253 // the exponent has to be incremented.
1254 result.exponent=
1257
1258 // post normalization of the fraction
1259 // In the case of overflow, set the MSB to 1
1260 // The subnormal case will have (only) the MSB set to 1
1261 result.fraction.back()=prop.lor(result.fraction.back(), overflow);
1262#endif
1263 }
1264}
1265
1267{
1268 PRECONDITION(result.exponent.size() >= spec.e);
1269
1270 // do we need to enlarge the exponent?
1271 if(result.exponent.size() == spec.e) // it stays
1272 {
1273 // do nothing
1274 }
1275 else // exponent gets smaller -- chop off top bits
1276 {
1277 bvt old_exponent=result.exponent;
1278 result.exponent.resize(spec.e);
1279
1280 // max_exponent is the maximum representable
1281 // i.e. 1 higher than the maximum possible for a normal number
1282 bvt max_exponent=
1284 spec.max_exponent()-spec.bias(), old_exponent.size());
1285
1286 // the exponent is garbage if the fractional is zero
1287
1289 prop.land(
1290 !bv_utils.signed_less_than(old_exponent, max_exponent),
1291 !bv_utils.is_zero(result.fraction));
1292
1293#if 1
1294 // Directed rounding modes round overflow to the maximum normal
1295 // depending on the particular mode and the sign
1299 !result.sign),
1301 result.sign)));
1302
1305
1306
1309 spec.max_exponent()-(spec.bias() + 1), result.exponent.size());
1310
1311 result.exponent=
1313
1314 result.fraction=
1316 bv_utils.inverted(bv_utils.zeros(result.fraction.size())),
1317 result.fraction);
1318
1319 result.infinity=prop.lor(result.infinity,
1322#else
1324#endif
1325 }
1326}
1327
1330{
1331 PRECONDITION(src.fraction.size() == spec.f + 1);
1332
1333 biased_floatt result;
1334
1335 result.sign=src.sign;
1336 result.NaN=src.NaN;
1337 result.infinity=src.infinity;
1338
1339 // we need to bias the new exponent
1340 result.exponent=add_bias(src.exponent);
1341
1342 // strip off hidden bit
1343
1344 literalt hidden_bit=src.fraction[src.fraction.size()-1];
1346
1347 result.fraction=src.fraction;
1348 result.fraction.resize(spec.f);
1349
1350 // make exponent zero if its denormal
1351 // (includes zero)
1352 for(std::size_t i=0; i<result.exponent.size(); i++)
1353 result.exponent[i]=
1354 prop.land(result.exponent[i], !denormal);
1355
1356 return result;
1357}
1358
1360{
1361 PRECONDITION(src.size() == spec.e);
1362
1363 return bv_utils.add(
1364 src,
1366}
1367
1369{
1370 PRECONDITION(src.size() == spec.e);
1371
1372 return bv_utils.sub(
1373 src,
1375}
1376
1378{
1379 PRECONDITION(src.size() == spec.width());
1380
1381 unbiased_floatt result;
1382
1383 result.sign=sign_bit(src);
1384
1385 result.fraction=get_fraction(src);
1386 result.fraction.push_back(is_normal(src)); // add hidden bit
1387
1388 result.exponent=get_exponent(src);
1389 CHECK_RETURN(result.exponent.size() == spec.e);
1390
1391 // unbias the exponent
1393
1394 result.exponent=
1397 sub_bias(result.exponent));
1398
1399 result.infinity=is_infinity(src);
1400 result.zero=is_zero(src);
1401 result.NaN=is_NaN(src);
1402
1403 return result;
1404}
1405
1407{
1408 PRECONDITION(src.fraction.size() == spec.f);
1409 PRECONDITION(src.exponent.size() == spec.e);
1410
1411 bvt result;
1412 result.resize(spec.width());
1413
1414 // do sign
1415 // we make this 'false' for NaN
1416 result[result.size()-1]=
1417 prop.lselect(src.NaN, const_literal(false), src.sign);
1418
1420 prop.lor(src.NaN, src.infinity);
1421
1422 // just copy fraction
1423 for(std::size_t i=0; i<spec.f; i++)
1424 result[i]=prop.land(src.fraction[i], !infinity_or_NaN);
1425
1426 result[0]=prop.lor(result[0], src.NaN);
1427
1428 // do exponent
1429 for(std::size_t i=0; i<spec.e; i++)
1430 result[i+spec.f]=prop.lor(
1431 src.exponent[i],
1433
1434 return result;
1435}
1436
1438{
1440
1441 for(std::size_t i=0; i<src.size(); i++)
1442 int_value+=power(2, i)*prop.l_get(src[i]).is_true();
1443
1444 ieee_float_valuet result;
1445 result.spec=spec;
1446 result.unpack(int_value);
1447
1448 return result;
1449}
1450
1452 const bvt &op,
1453 const bvt &dist,
1455{
1456 std::size_t d=1;
1457 bvt result=op;
1458 sticky=const_literal(false);
1459
1460 for(std::size_t stage=0; stage<dist.size(); stage++)
1461 {
1462 if(dist[stage]!=const_literal(false))
1463 {
1465
1466 bvt lost_bits;
1467
1468 if(d<=result.size())
1469 lost_bits=bv_utils.extract(result, 0, d-1);
1470 else
1471 lost_bits=result;
1472
1473 sticky=prop.lor(
1475 sticky);
1476
1477 result=bv_utils.select(dist[stage], tmp, result);
1478 }
1479
1480 d=d<<1;
1481 }
1482
1483 return result;
1484}
1485
1487 const bvt &src1,
1488 const bvt &)
1489{
1490 return src1;
1491}
1492
1494 const bvt &op0,
1495 const bvt &)
1496{
1497 return op0;
1498}
std::size_t address_bits(const mp_integer &size)
ceil(log2(size))
mp_integer power(const mp_integer &base, const mp_integer &exponent)
A multi-precision implementation of the power operator.
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:566
static bvt inverted(const bvt &op)
Definition bv_utils.cpp:638
literalt signed_less_than(const bvt &bv0, const bvt &bv1)
literalt is_all_ones(const bvt &op)
Definition bv_utils.h:158
literalt is_not_zero(const bvt &op)
Definition bv_utils.h:146
static bvt extract_msb(const bvt &a, std::size_t n)
Definition bv_utils.cpp:59
bvt add(const bvt &op0, const bvt &op1)
Definition bv_utils.h:66
static bvt zero_extension(const bvt &bv, std::size_t new_size)
Definition bv_utils.h:187
bvt absolute_value(const bvt &op)
bvt select(literalt s, const bvt &a, const bvt &b)
If s is true, selects a otherwise selects b.
Definition bv_utils.cpp:97
static bvt build_constant(const mp_integer &i, std::size_t width)
Definition bv_utils.cpp:16
literalt is_zero(const bvt &op)
Definition bv_utils.h:143
literalt equal(const bvt &op0, const bvt &op1)
Bit-blasting ID_equal and use in other encodings.
void cond_implies_equal(literalt cond, const bvt &a, const bvt &b)
literalt unsigned_less_than(const bvt &bv0, const bvt &bv1)
bvt incrementer(const bvt &op, literalt carry_in)
Definition bv_utils.cpp:630
bvt add_sub(const bvt &op0, const bvt &op1, bool subtract)
Definition bv_utils.cpp:339
static bvt shift(const bvt &op, const shiftt shift, std::size_t distance)
Definition bv_utils.cpp:538
static bvt concatenate(const bvt &a, const bvt &b)
Definition bv_utils.cpp:81
bvt sub(const bvt &op0, const bvt &op1)
Definition bv_utils.h:67
static bvt extract(const bvt &a, std::size_t first, std::size_t last)
Definition bv_utils.cpp:43
bvt inc(const bvt &op)
Definition bv_utils.h:33
bvt unsigned_multiplier(const bvt &op0, const bvt &op1)
Definition bv_utils.cpp:920
void unsigned_divider(const bvt &op0, const bvt &op1, bvt &res, bvt &rem)
bvt cond_negate(const bvt &bv, const literalt cond)
static bvt zeros(std::size_t new_size)
Definition bv_utils.h:192
static bvt sign_extension(const bvt &bv, std::size_t new_size)
Definition bv_utils.h:182
unbiased_floatt rounder(const unbiased_floatt &)
bvt to_integer(const bvt &src, std::size_t int_width, bool is_signed)
literalt is_NaN(const bvt &)
virtual void normalization_shift(bvt &fraction, bvt &exponent)
normalize fraction/exponent pair returns 'zero' if fraction is zero
bv_utilst bv_utils
bvt debug2(const bvt &op0, const bvt &op1)
virtual bvt rem(const bvt &src1, const bvt &src2)
bvt round_to_integral(const bvt &)
literalt is_plus_inf(const bvt &)
ieee_float_valuet get(const bvt &) const
literalt is_infinity(const bvt &)
void set_rounding_mode(const bvt &)
void round_exponent(unbiased_floatt &result)
void round_fraction(unbiased_floatt &result)
bvt sticky_right_shift(const bvt &op, const bvt &dist, literalt &sticky)
unbiased_floatt unpack(const bvt &)
bvt from_unsigned_integer(const bvt &)
virtual bvt mul(const bvt &src1, const bvt &src2)
bvt debug1(const bvt &op0, const bvt &op1)
bvt add_bias(const bvt &exponent)
bvt round_and_pack(const unbiased_floatt &)
bvt subtract_exponents(const unbiased_floatt &src1, const unbiased_floatt &src2)
Subtracts the exponents.
bvt get_fraction(const bvt &)
Gets the fraction without hidden bit in a floating-point bit-vector src.
literalt is_minus_inf(const bvt &)
literalt fraction_rounding_decision(const std::size_t dest_bits, const literalt sign, const bvt &fraction)
rounding decision for fraction using sticky bit
bvt get_exponent(const bvt &)
Gets the unbiased exponent in a floating-point bit-vector.
void denormalization_shift(bvt &fraction, bvt &exponent)
make sure exponent is not too small; the exponent is unbiased
bvt to_unsigned_integer(const bvt &src, std::size_t int_width)
bvt build_constant(const ieee_float_valuet &)
virtual bvt div(const bvt &src1, const bvt &src2)
bvt negate(const bvt &)
literalt exponent_all_zeros(const bvt &)
literalt fraction_all_zeros(const bvt &)
bvt fma(const bvt &multiply_lhs, const bvt &multiply_rhs, const bvt &addend)
Fused multiply-add: round(multiply_lhs * multiply_rhs + addend) with a single rounding step.
bvt from_signed_integer(const bvt &)
literalt is_zero(const bvt &)
bvt sub(const bvt &src1, const bvt &src2)
bvt sub_bias(const bvt &exponent)
bvt limit_distance(const bvt &dist, mp_integer limit)
Limits the shift distance.
bvt conversion(const bvt &src, const ieee_float_spect &dest_spec)
bvt pack(const biased_floatt &)
virtual bvt add_sub(const bvt &src1, const bvt &src2, bool subtract)
bvt abs(const bvt &)
static literalt sign_bit(const bvt &src)
Definition float_utils.h:98
ieee_float_spect spec
Definition float_utils.h:94
literalt exponent_all_ones(const bvt &)
bvt to_signed_integer(const bvt &src, std::size_t int_width)
literalt is_normal(const bvt &)
literalt relation(const bvt &src1, relt rel, const bvt &src2)
rounding_mode_bitst rounding_mode_bits
Definition float_utils.h:73
biased_floatt bias(const unbiased_floatt &)
takes an unbiased float, and applies the bias
mp_integer bias() const
mp_integer max_exponent() const
std::size_t f
Definition ieee_float.h:26
std::size_t width() const
Definition ieee_float.h:50
std::size_t e
Definition ieee_float.h:26
An IEEE 754 floating-point value, including specificiation.
Definition ieee_float.h:117
bool is_NaN() const
Definition ieee_float.h:259
ieee_float_spect spec
Definition ieee_float.h:119
bool get_sign() const
Definition ieee_float.h:254
const mp_integer & get_fraction() const
Definition ieee_float.h:264
void unpack(const mp_integer &)
bool is_infinity() const
Definition ieee_float.h:260
const mp_integer & get_exponent() const
Definition ieee_float.h:263
An IEEE 754 value plus a rounding mode, enabling operations with rounding on values.
Definition ieee_float.h:338
bool is_true() const
Definition literal.h:156
virtual literalt land(literalt a, literalt b)=0
virtual literalt lselect(literalt a, literalt b, literalt c)=0
virtual literalt lxor(literalt a, literalt b)=0
virtual bvt new_variables(std::size_t width)
generates a bitvector of given width with new variables
Definition prop.cpp:30
virtual literalt new_variable()=0
virtual literalt lor(literalt a, literalt b)=0
virtual tvt l_get(literalt a) const =0
literalt neg(literalt a)
Definition literal.h:193
std::vector< literalt > bvt
Definition literal.h:201
literalt const_literal(bool value)
Definition literal.h:188
BigInt mp_integer
Definition smt_terms.h:17
#define CHECK_RETURN(CONDITION)
Definition invariant.h:495
#define UNREACHABLE
This should be used to mark dead code.
Definition invariant.h:525
#define PRECONDITION(CONDITION)
Definition invariant.h:463
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition invariant.h:423
bool is_signed(const typet &t)
Convenience function – is the type signed?
Definition util.cpp:45