Formalization of floating-point numbers, using the Flocq library.
Require Import Reals.
Require Import Coqlib Zbits Integers.
From Flocq Require Import BinarySingleNaN Binary Bits Core.
Require Import IEEE754_extra.
Require Import Program.
Require Archi.
Import ListNotations.
Close Scope R_scope.
Open Scope Z_scope.
Set Asymmetric Patterns.
Definition float :=
binary64.
(* the type of IEE754 double-precision FP numbers *)
Definition float32 :=
binary32.
(* the type of IEE754 single-precision FP numbers *)
Lemma integer_representable_n :
forall n :
Z, - 2 ^ 53 <=
n <= 2 ^ 53 ->
integer_representable 53 1024
n.
Proof.
Boolean-valued comparisons
Definition cmp_of_comparison (
c:
comparison) (
x:
option Datatypes.comparison) :
bool :=
match c with
|
Ceq =>
match x with Some Eq =>
true |
_ =>
false end
|
Cne =>
match x with Some Eq =>
false |
_ =>
true end
|
Clt =>
match x with Some Lt =>
true |
_ =>
false end
|
Cle =>
match x with Some(
Lt|
Eq) =>
true |
_ =>
false end
|
Cgt =>
match x with Some Gt =>
true |
_ =>
false end
|
Cge =>
match x with Some(
Gt|
Eq) =>
true |
_ =>
false end
end.
Definition ordered_of_comparison (
x:
option Datatypes.comparison) :
bool :=
match x with None =>
false |
Some _ =>
true end.
Lemma cmp_of_comparison_swap:
forall c x,
cmp_of_comparison (
swap_comparison c)
x =
cmp_of_comparison c (
match x with None =>
None |
Some x =>
Some (
CompOpp x)
end).
Proof.
intros. destruct c; destruct x as [[]|]; reflexivity.
Qed.
Lemma cmp_of_comparison_ne_eq:
forall x,
cmp_of_comparison Cne x =
negb (
cmp_of_comparison Ceq x).
Proof.
intros. destruct x as [[]|]; reflexivity.
Qed.
Lemma cmp_of_comparison_lt_eq_false:
forall x,
cmp_of_comparison Clt x =
true ->
cmp_of_comparison Ceq x =
true ->
False.
Proof.
destruct x as [[]|]; simpl; intros; discriminate.
Qed.
Lemma cmp_of_comparison_le_lt_eq:
forall x,
cmp_of_comparison Cle x =
cmp_of_comparison Clt x ||
cmp_of_comparison Ceq x.
Proof.
destruct x as [[]|]; reflexivity.
Qed.
Lemma cmp_of_comparison_gt_eq_false:
forall x,
cmp_of_comparison Cgt x =
true ->
cmp_of_comparison Ceq x =
true ->
False.
Proof.
destruct x as [[]|]; simpl; intros; discriminate.
Qed.
Lemma cmp_of_comparison_ge_gt_eq:
forall x,
cmp_of_comparison Cge x =
cmp_of_comparison Cgt x ||
cmp_of_comparison Ceq x.
Proof.
destruct x as [[]|]; reflexivity.
Qed.
Lemma cmp_of_comparison_lt_gt_false:
forall x,
cmp_of_comparison Clt x =
true ->
cmp_of_comparison Cgt x =
true ->
False.
Proof.
destruct x as [[]|]; simpl; intros; discriminate.
Qed.
Normalization of NaN payloads
Lemma normalized_nan:
forall prec n p,
Z.of_nat n =
prec - 1 -> 1 <
prec ->
nan_pl prec (
Z.to_pos (
P_mod_two_p p n)) =
true.
Proof.
Transform a Nan payload to a quiet Nan payload.
Definition quiet_nan_64_payload (
p:
positive) :=
Z.to_pos (
P_mod_two_p (
Pos.lor p ((
iter_nat xO 51 1%
positive))) 52%
nat).
Lemma quiet_nan_64_proof:
forall p,
nan_pl 53 (
quiet_nan_64_payload p) =
true.
Proof.
Definition quiet_nan_64 (
sp:
bool *
positive) : {
x :
float |
is_nan _ _ x =
true} :=
let (
s,
p) :=
sp in
exist _ (
B754_nan 53 1024
s (
quiet_nan_64_payload p) (
quiet_nan_64_proof p)) (
eq_refl true).
Definition default_nan_64 :=
quiet_nan_64 Archi.default_nan_64.
Definition quiet_nan_32_payload (
p:
positive) :=
Z.to_pos (
P_mod_two_p (
Pos.lor p ((
iter_nat xO 22 1%
positive))) 23%
nat).
Lemma quiet_nan_32_proof:
forall p,
nan_pl 24 (
quiet_nan_32_payload p) =
true.
Proof.
Definition quiet_nan_32 (
sp:
bool *
positive) : {
x :
float32 |
is_nan _ _ x =
true} :=
let (
s,
p) :=
sp in
exist _ (
B754_nan 24 128
s (
quiet_nan_32_payload p) (
quiet_nan_32_proof p)) (
eq_refl true).
Definition default_nan_32 :=
quiet_nan_32 Archi.default_nan_32.
Local Notation __ := (
eq_refl Datatypes.Lt).
Local Hint Extern 1 (
Prec_gt_0 _) =>
exact (
eq_refl Datatypes.Lt) :
core.
Local Hint Extern 1 (
_ <
_) =>
exact (
eq_refl Datatypes.Lt) :
core.
Double-precision FP numbers
Module Float.
NaN payload manipulations
The following definitions are not part of the IEEE754 standard but
apply to all architectures supported by CompCert.
Nan payload operations for single <-> double conversions.
Definition expand_nan_payload (
p:
positive) :=
Pos.shiftl_nat p 29.
Lemma expand_nan_proof (
p :
positive) :
nan_pl 24
p =
true ->
nan_pl 53 (
expand_nan_payload p) =
true.
Proof.
Definition expand_nan s p H : {
x |
is_nan _ _ x =
true} :=
exist _ (
B754_nan 53 1024
s (
expand_nan_payload p) (
expand_nan_proof p H)) (
eq_refl true).
Definition of_single_nan (
f :
float32) : {
x :
float |
is_nan _ _ x =
true } :=
match f with
|
B754_nan s p H =>
if Archi.float_conversion_default_nan
then default_nan_64
else if Archi.float_of_single_preserves_sNaN
then expand_nan s p H
else quiet_nan_64 (
s,
expand_nan_payload p)
|
_ =>
default_nan_64
end.
Definition reduce_nan_payload (
p:
positive) :=
Pos.shiftr_nat (
quiet_nan_64_payload p) 29.
Definition to_single_nan (
f :
float) : {
x :
float32 |
is_nan _ _ x =
true } :=
match f with
|
B754_nan s p H =>
if Archi.float_conversion_default_nan
then default_nan_32
else quiet_nan_32 (
s,
reduce_nan_payload p)
|
_ =>
default_nan_32
end.
NaN payload operations for opposite and absolute value.
Definition neg_nan (
f :
float) : {
x :
float |
is_nan _ _ x =
true } :=
match f with
|
B754_nan s p H =>
exist _ (
B754_nan 53 1024 (
negb s)
p H) (
eq_refl true)
|
_ =>
default_nan_64
end.
Definition abs_nan (
f :
float) : {
x :
float |
is_nan _ _ x =
true } :=
match f with
|
B754_nan s p H =>
exist _ (
B754_nan 53 1024
false p H) (
eq_refl true)
|
_ =>
default_nan_64
end.
When an arithmetic operation returns a NaN, the sign and payload
of this NaN are not fully specified by the IEEE standard, and vary
among the architectures supported by CompCert. However, the following
behavior applies to all the supported architectures: the payload is either
-
a default payload, independent of the arguments, or
-
the payload of one of the NaN arguments, if any.
For each supported architecture, the functions
Archi.choose_nan_64
and
Archi.choose_nan_32 determine the payload of the result as a
function of the payloads of the NaN arguments.
Additionally, signaling NaNs are converted to quiet NaNs, as required by the standard.
Definition cons_pl (
x:
float) (
l:
list (
bool *
positive)) :=
match x with B754_nan s p _ => (
s,
p) ::
l |
_ =>
l end.
Definition unop_nan (
x:
float) : {
x :
float |
is_nan _ _ x =
true} :=
quiet_nan_64 (
Archi.choose_nan_64 (
cons_pl x [])).
Definition binop_nan (
x y:
float) : {
x :
float |
is_nan _ _ x =
true} :=
quiet_nan_64 (
Archi.choose_nan_64 (
cons_pl x (
cons_pl y []))).
For fused multiply-add, the order in which arguments are examined
to select a NaN payload varies across platforms. E.g. in fma x y z,
x86 considers x first, then y, then z, while ARM considers z first,
then x, then y. The corresponding permutation is defined
for each target, as function Archi.fma_order.
Definition fma_nan_1 (
x y z:
float) : {
x :
float |
is_nan _ _ x =
true} :=
let '(
a,
b,
c) :=
Archi.fma_order x y z in
quiet_nan_64 (
Archi.choose_nan_64 (
cons_pl a (
cons_pl b (
cons_pl c [])))).
One last wrinkle for fused multiply-add: fma zero infinity nan
can return either the quiesced nan, or the default NaN arising out
of the invalid operation zero * infinity. Of our target platforms,
only ARM honors the latter case. The choice between the default NaN
and nan is done as in the case of two-argument arithmetic operations.
Definition fma_nan (
x y z:
float) : {
x :
float |
is_nan _ _ x =
true} :=
match x,
y with
|
B754_infinity _,
B754_zero _ |
B754_zero _,
B754_infinity _ =>
if Archi.fma_invalid_mul_is_nan
then quiet_nan_64 (
Archi.choose_nan_64 (
Archi.default_nan_64 ::
cons_pl z []))
else fma_nan_1 x y z
|
_,
_ =>
fma_nan_1 x y z
end.
Operations over double-precision floats
Definition zero:
float :=
B754_zero _ _ false.
(* the float +0.0 *)
Definition eq_dec:
forall (
f1 f2:
float), {
f1 =
f2} + {
f1 <>
f2} :=
Beq_dec _ _.
Arithmetic operations
Definition neg:
float ->
float :=
Bopp _ _ neg_nan.
(* opposite (change sign) *)
Definition abs:
float ->
float :=
Babs _ _ abs_nan.
(* absolute value (set sign to +) *)
Definition sqrt:
float ->
float :=
Bsqrt 53 1024
__ __ unop_nan mode_NE.
(* square root *)
Definition add:
float ->
float ->
float :=
Bplus 53 1024
__ __ binop_nan mode_NE.
(* addition *)
Definition sub:
float ->
float ->
float :=
Bminus 53 1024
__ __ binop_nan mode_NE.
(* subtraction *)
Definition mul:
float ->
float ->
float :=
Bmult 53 1024
__ __ binop_nan mode_NE.
(* multiplication *)
Definition div:
float ->
float ->
float :=
Bdiv 53 1024
__ __ binop_nan mode_NE.
(* division *)
Definition fma:
float ->
float ->
float ->
float :=
Bfma 53 1024
__ __ fma_nan mode_NE.
(* fused multiply-add x * y + z *)
Definition compare (
f1 f2:
float) :
option Datatypes.comparison :=
(* general comparison *)
Bcompare 53 1024
f1 f2.
Definition cmp (
c:
comparison) (
f1 f2:
float) :
bool :=
(* Boolean comparison *)
cmp_of_comparison c (
compare f1 f2).
Definition ordered (
f1 f2:
float) :
bool :=
ordered_of_comparison (
compare f1 f2).
Conversions
Definition of_single:
float32 ->
float :=
Bconv _ _ 53 1024
__ __ of_single_nan mode_NE.
Definition to_single:
float ->
float32 :=
Bconv _ _ 24 128
__ __ to_single_nan mode_NE.
Definition to_int (
f:
float):
option int :=
(* conversion to signed 32-bit int *)
option_map Int.repr (
ZofB_range _ _ f Int.min_signed Int.max_signed).
Definition to_intu (
f:
float):
option int :=
(* conversion to unsigned 32-bit int *)
option_map Int.repr (
ZofB_range _ _ f 0
Int.max_unsigned).
Definition to_long (
f:
float):
option int64 :=
(* conversion to signed 64-bit int *)
option_map Int64.repr (
ZofB_range _ _ f Int64.min_signed Int64.max_signed).
Definition to_longu (
f:
float):
option int64 :=
(* conversion to unsigned 64-bit int *)
option_map Int64.repr (
ZofB_range _ _ f 0
Int64.max_unsigned).
Definition of_int (
n:
int):
float :=
(* conversion from signed 32-bit int *)
BofZ 53 1024
__ __ (
Int.signed n).
Definition of_intu (
n:
int):
float:=
(* conversion from unsigned 32-bit int *)
BofZ 53 1024
__ __ (
Int.unsigned n).
Definition of_long (
n:
int64):
float :=
(* conversion from signed 64-bit int *)
BofZ 53 1024
__ __ (
Int64.signed n).
Definition of_longu (
n:
int64):
float:=
(* conversion from unsigned 64-bit int *)
BofZ 53 1024
__ __ (
Int64.unsigned n).
Definition from_parsed (
base:
positive) (
intPart:
positive) (
expPart:
Z) :
float :=
Bparse 53 1024
__ __ base intPart expPart.
Conversions between floats and their concrete in-memory representation
as a sequence of 64 bits.
Definition to_bits (
f:
float):
int64 :=
Int64.repr (
bits_of_b64 f).
Definition of_bits (
b:
int64):
float :=
b64_of_bits (
Int64.unsigned b).
Definition from_words (
hi lo:
int) :
float :=
of_bits (
Int64.ofwords hi lo).
Properties
Below are the only properties of floating-point arithmetic that we
rely on in the compiler proof.
Some tactics *
Ltac compute_this val :=
let x :=
fresh in set val as x in *;
vm_compute in x;
subst x.
Ltac smart_omega :=
simpl radix_val in *;
simpl Z.pow in *;
compute_this Int.modulus;
compute_this Int.half_modulus;
compute_this Int.max_unsigned;
compute_this Int.min_signed;
compute_this Int.max_signed;
compute_this Int64.modulus;
compute_this Int64.half_modulus;
compute_this Int64.max_unsigned;
compute_this (
Z.pow_pos 2 1024);
compute_this (
Z.pow_pos 2 53);
compute_this (
Z.pow_pos 2 52);
compute_this (
Z.pow_pos 2 32);
zify;
lia.
Commutativity properties of addition and multiplication.
Theorem add_commut:
forall x y,
is_nan _ _ x =
false \/
is_nan _ _ y =
false ->
add x y =
add y x.
Proof.
intros.
apply Bplus_commut.
destruct x,
y;
try reflexivity;
now destruct H.
Qed.
Theorem mul_commut:
forall x y,
is_nan _ _ x =
false \/
is_nan _ _ y =
false ->
mul x y =
mul y x.
Proof.
intros.
apply Bmult_commut.
destruct x,
y;
try reflexivity;
now destruct H.
Qed.
Multiplication by 2 is diagonal addition.
Theorem mul2_add:
forall f,
add f f =
mul f (
of_int (
Int.repr 2%
Z)).
Proof.
Divisions that can be turned into multiplication by an inverse.
Definition exact_inverse :
float ->
option float :=
Bexact_inverse 53 1024
__ __.
Theorem div_mul_inverse:
forall x y z,
exact_inverse y =
Some z ->
div x y =
mul x z.
Proof.
intros.
apply Bdiv_mult_inverse. 2:
easy.
intros x0 y0 z0 Hx Hy Hz.
unfold binop_nan.
destruct x0;
try discriminate.
destruct y0,
z0;
reflexivity ||
discriminate.
Qed.
Properties of comparisons.
Theorem cmp_swap:
forall c x y,
cmp (
swap_comparison c)
x y =
cmp c y x.
Proof.
Theorem cmp_ne_eq:
forall f1 f2,
cmp Cne f1 f2 =
negb (
cmp Ceq f1 f2).
Proof.
Theorem cmp_lt_eq_false:
forall f1 f2,
cmp Clt f1 f2 =
true ->
cmp Ceq f1 f2 =
true ->
False.
Proof.
Theorem cmp_le_lt_eq:
forall f1 f2,
cmp Cle f1 f2 =
cmp Clt f1 f2 ||
cmp Ceq f1 f2.
Proof.
Theorem cmp_gt_eq_false:
forall x y,
cmp Cgt x y =
true ->
cmp Ceq x y =
true ->
False.
Proof.
Theorem cmp_ge_gt_eq:
forall f1 f2,
cmp Cge f1 f2 =
cmp Cgt f1 f2 ||
cmp Ceq f1 f2.
Proof.
Theorem cmp_lt_gt_false:
forall f1 f2,
cmp Clt f1 f2 =
true ->
cmp Cgt f1 f2 =
true ->
False.
Proof.
Properties of conversions to/from in-memory representation.
The conversions are bijective (one-to-one).
Theorem of_to_bits:
forall f,
of_bits (
to_bits f) =
f.
Proof.
Theorem to_of_bits:
forall b,
to_bits (
of_bits b) =
b.
Proof.
Conversions between floats and unsigned ints can be defined
in terms of conversions between floats and signed ints.
(Most processors provide only the latter, forcing the compiler
to emulate the former.)
Definition ox8000_0000 :=
Int.repr Int.half_modulus.
(* 0x8000_0000 *)
Definition ox7FFF_FFFF :=
Int.repr Int.max_signed.
(* 0x7FFF_FFFF *)
Theorem of_intu_of_int_1:
forall x,
Int.ltu x ox8000_0000 =
true ->
of_intu x =
of_int x.
Proof.
Theorem of_intu_of_int_2:
forall x,
Int.ltu x ox8000_0000 =
false ->
of_intu x =
add (
of_int (
Int.sub x ox8000_0000)) (
of_intu ox8000_0000).
Proof.
Theorem of_intu_of_int_3:
forall x,
of_intu x =
sub (
of_int (
Int.and x ox7FFF_FFFF)) (
of_int (
Int.and x ox8000_0000)).
Proof.
Theorem to_intu_to_int_1:
forall x n,
cmp Clt x (
of_intu ox8000_0000) =
true ->
to_intu x =
Some n ->
to_int x =
Some n.
Proof.
Theorem to_intu_to_int_2:
forall x n,
cmp Clt x (
of_intu ox8000_0000) =
false ->
to_intu x =
Some n ->
to_int (
sub x (
of_intu ox8000_0000)) =
Some (
Int.sub n ox8000_0000).
Proof.
Conversions from ints to floats can be defined as bitwise manipulations
over the in-memory representation. This is what the PowerPC port does.
The trick is that from_words 0x4330_0000 x is the float
2^52 + of_intu x.
Definition ox4330_0000 :=
Int.repr 1127219200.
(* 0x4330_0000 *)
Lemma split_bits_or:
forall x,
split_bits 52 11 (
Int64.unsigned (
Int64.ofwords ox4330_0000 x)) = (
false,
Int.unsigned x, 1075).
Proof.
Lemma from_words_value:
forall x,
B2R _ _ (
from_words ox4330_0000 x) = (
bpow radix2 52 +
IZR (
Int.unsigned x))%
R
/\
is_finite _ _ (
from_words ox4330_0000 x) =
true
/\
Bsign _ _ (
from_words ox4330_0000 x) =
false.
Proof.
Lemma from_words_eq:
forall x,
from_words ox4330_0000 x =
BofZ 53 1024
__ __ (2^52 +
Int.unsigned x).
Proof.
Theorem of_intu_from_words:
forall x,
of_intu x =
sub (
from_words ox4330_0000 x) (
from_words ox4330_0000 Int.zero).
Proof.
Lemma ox8000_0000_signed_unsigned:
forall x,
Int.unsigned (
Int.add x ox8000_0000) =
Int.signed x +
Int.half_modulus.
Proof.
Theorem of_int_from_words:
forall x,
of_int x =
sub (
from_words ox4330_0000 (
Int.add x ox8000_0000))
(
from_words ox4330_0000 ox8000_0000).
Proof.
Definition ox4530_0000 :=
Int.repr 1160773632.
(* 0x4530_0000 *)
Lemma split_bits_or':
forall x,
split_bits 52 11 (
Int64.unsigned (
Int64.ofwords ox4530_0000 x)) = (
false,
Int.unsigned x, 1107).
Proof.
Lemma from_words_value':
forall x,
B2R _ _ (
from_words ox4530_0000 x) = (
bpow radix2 84 +
IZR (
Int.unsigned x *
two_p 32))%
R
/\
is_finite _ _ (
from_words ox4530_0000 x) =
true
/\
Bsign _ _ (
from_words ox4530_0000 x) =
false.
Proof.
Lemma from_words_eq':
forall x,
from_words ox4530_0000 x =
BofZ 53 1024
__ __ (2^84 +
Int.unsigned x * 2^32).
Proof.
Theorem of_longu_from_words:
forall l,
of_longu l =
add (
sub (
from_words ox4530_0000 (
Int64.hiword l))
(
from_words ox4530_0000 (
Int.repr (
two_p 20))))
(
from_words ox4330_0000 (
Int64.loword l)).
Proof.
Theorem of_long_from_words:
forall l,
of_long l =
add (
sub (
from_words ox4530_0000 (
Int.add (
Int64.hiword l)
ox8000_0000))
(
from_words ox4530_0000 (
Int.repr (
two_p 20+
two_p 31))))
(
from_words ox4330_0000 (
Int64.loword l)).
Proof.
Conversions from 64-bit integers can be expressed in terms of
conversions from their 32-bit halves.
Theorem of_longu_decomp:
forall l,
of_longu l =
add (
mul (
of_intu (
Int64.hiword l)) (
BofZ 53 1024
__ __ (2^32)))
(
of_intu (
Int64.loword l)).
Proof.
Theorem of_long_decomp:
forall l,
of_long l =
add (
mul (
of_int (
Int64.hiword l)) (
BofZ 53 1024
__ __ (2^32)))
(
of_intu (
Int64.loword l)).
Proof.
Conversions from unsigned longs can be expressed in terms of conversions from signed longs.
If the unsigned long is too big, a round-to-odd must be performed on it
to avoid double rounding.
Theorem of_longu_of_long_1:
forall x,
Int64.ltu x (
Int64.repr Int64.half_modulus) =
true ->
of_longu x =
of_long x.
Proof.
Theorem of_longu_of_long_2:
forall x,
Int64.ltu x (
Int64.repr Int64.half_modulus) =
false ->
of_longu x =
mul (
of_long (
Int64.or (
Int64.shru x Int64.one)
(
Int64.and x Int64.one)))
(
of_int (
Int.repr 2)).
Proof.
Conversions to/from 32-bit integers can be implemented by going through 64-bit integers.
Remark ZofB_range_widen:
forall (
f:
float)
n min1 max1 min2 max2,
ZofB_range _ _ f min1 max1 =
Some n ->
min2 <=
min1 ->
max1 <=
max2 ->
ZofB_range _ _ f min2 max2 =
Some n.
Proof.
Theorem to_int_to_long:
forall f n,
to_int f =
Some n ->
to_long f =
Some (
Int64.repr (
Int.signed n)).
Proof.
Theorem to_intu_to_longu:
forall f n,
to_intu f =
Some n ->
to_longu f =
Some (
Int64.repr (
Int.unsigned n)).
Proof.
Theorem to_intu_to_long:
forall f n,
to_intu f =
Some n ->
to_long f =
Some (
Int64.repr (
Int.unsigned n)).
Proof.
Theorem of_int_of_long:
forall n,
of_int n =
of_long (
Int64.repr (
Int.signed n)).
Proof.
Theorem of_intu_of_longu:
forall n,
of_intu n =
of_longu (
Int64.repr (
Int.unsigned n)).
Proof.
Theorem of_intu_of_long:
forall n,
of_intu n =
of_long (
Int64.repr (
Int.unsigned n)).
Proof.
End Float.
Single-precision FP numbers
Module Float32.
Definition neg_nan (
f :
float32) : {
x :
float32 |
is_nan _ _ x =
true } :=
match f with
|
B754_nan s p H =>
exist _ (
B754_nan 24 128 (
negb s)
p H) (
eq_refl true)
|
_ =>
default_nan_32
end.
Definition abs_nan (
f :
float32) : {
x :
float32 |
is_nan _ _ x =
true } :=
match f with
|
B754_nan s p H =>
exist _ (
B754_nan 24 128
false p H) (
eq_refl true)
|
_ =>
default_nan_32
end.
Definition cons_pl (
x:
float32) (
l:
list (
bool *
positive)) :=
match x with B754_nan s p _ => (
s,
p) ::
l |
_ =>
l end.
Definition unop_nan (
x:
float32) : {
x :
float32 |
is_nan _ _ x =
true} :=
quiet_nan_32 (
Archi.choose_nan_32 (
cons_pl x [])).
Definition binop_nan (
x y:
float32) : {
x :
float32 |
is_nan _ _ x =
true} :=
quiet_nan_32 (
Archi.choose_nan_32 (
cons_pl x (
cons_pl y []))).
Definition fma_nan_1 (
x y z:
float32) : {
x :
float32 |
is_nan _ _ x =
true} :=
let '(
a,
b,
c) :=
Archi.fma_order x y z in
quiet_nan_32 (
Archi.choose_nan_32 (
cons_pl a (
cons_pl b (
cons_pl c [])))).
Definition fma_nan (
x y z:
float32) : {
x :
float32 |
is_nan _ _ x =
true} :=
match x,
y with
|
B754_infinity _,
B754_zero _ |
B754_zero _,
B754_infinity _ =>
if Archi.fma_invalid_mul_is_nan
then quiet_nan_32 (
Archi.choose_nan_32 (
Archi.default_nan_32 ::
cons_pl z []))
else fma_nan_1 x y z
|
_,
_ =>
fma_nan_1 x y z
end.
Operations over single-precision floats
Definition zero:
float32 :=
B754_zero _ _ false.
(* the float +0.0 *)
Definition eq_dec:
forall (
f1 f2:
float32), {
f1 =
f2} + {
f1 <>
f2} :=
Beq_dec _ _.
Arithmetic operations
Definition neg:
float32 ->
float32 :=
Bopp _ _ neg_nan.
(* opposite (change sign) *)
Definition abs:
float32 ->
float32 :=
Babs _ _ abs_nan.
(* absolute value (set sign to +) *)
Definition sqrt:
float32 ->
float32 :=
Bsqrt 24 128
__ __ unop_nan mode_NE.
(* square root *)
Definition add:
float32 ->
float32 ->
float32 :=
Bplus 24 128
__ __ binop_nan mode_NE.
(* addition *)
Definition sub:
float32 ->
float32 ->
float32 :=
Bminus 24 128
__ __ binop_nan mode_NE.
(* subtraction *)
Definition mul:
float32 ->
float32 ->
float32 :=
Bmult 24 128
__ __ binop_nan mode_NE.
(* multiplication *)
Definition div:
float32 ->
float32 ->
float32 :=
Bdiv 24 128
__ __ binop_nan mode_NE.
(* division *)
Definition fma:
float32 ->
float32 ->
float32 ->
float32 :=
Bfma 24 128
__ __ fma_nan mode_NE.
(* fused multiply-add x * y + z *)
Definition compare (
f1 f2:
float32) :
option Datatypes.comparison :=
(* general comparison *)
Bcompare 24 128
f1 f2.
Definition cmp (
c:
comparison) (
f1 f2:
float32) :
bool :=
(* comparison *)
cmp_of_comparison c (
compare f1 f2).
Definition ordered (
f1 f2:
float32) :
bool :=
ordered_of_comparison (
compare f1 f2).
Conversions
Definition of_double :
float ->
float32 :=
Float.to_single.
Definition to_double :
float32 ->
float :=
Float.of_single.
Definition to_int (
f:
float32):
option int :=
(* conversion to signed 32-bit int *)
option_map Int.repr (
ZofB_range _ _ f Int.min_signed Int.max_signed).
Definition to_intu (
f:
float32):
option int :=
(* conversion to unsigned 32-bit int *)
option_map Int.repr (
ZofB_range _ _ f 0
Int.max_unsigned).
Definition to_long (
f:
float32):
option int64 :=
(* conversion to signed 64-bit int *)
option_map Int64.repr (
ZofB_range _ _ f Int64.min_signed Int64.max_signed).
Definition to_longu (
f:
float32):
option int64 :=
(* conversion to unsigned 64-bit int *)
option_map Int64.repr (
ZofB_range _ _ f 0
Int64.max_unsigned).
Definition of_int (
n:
int):
float32 :=
(* conversion from signed 32-bit int to single-precision float *)
BofZ 24 128
__ __ (
Int.signed n).
Definition of_intu (
n:
int):
float32 :=
(* conversion from unsigned 32-bit int to single-precision float *)
BofZ 24 128
__ __ (
Int.unsigned n).
Definition of_long (
n:
int64):
float32 :=
(* conversion from signed 64-bit int to single-precision float *)
BofZ 24 128
__ __ (
Int64.signed n).
Definition of_longu (
n:
int64):
float32 :=
(* conversion from unsigned 64-bit int to single-precision float *)
BofZ 24 128
__ __ (
Int64.unsigned n).
Definition from_parsed (
base:
positive) (
intPart:
positive) (
expPart:
Z) :
float32 :=
Bparse 24 128
__ __ base intPart expPart.
Conversions between floats and their concrete in-memory representation
as a sequence of 32 bits.
Definition to_bits (
f:
float32) :
int :=
Int.repr (
bits_of_b32 f).
Definition of_bits (
b:
int):
float32 :=
b32_of_bits (
Int.unsigned b).
Properties
Commutativity properties of addition and multiplication.
Theorem add_commut:
forall x y,
is_nan _ _ x =
false \/
is_nan _ _ y =
false ->
add x y =
add y x.
Proof.
intros.
apply Bplus_commut.
destruct x,
y;
try reflexivity;
now destruct H.
Qed.
Theorem mul_commut:
forall x y,
is_nan _ _ x =
false \/
is_nan _ _ y =
false ->
mul x y =
mul y x.
Proof.
intros.
apply Bmult_commut.
destruct x,
y;
try reflexivity;
now destruct H.
Qed.
Multiplication by 2 is diagonal addition.
Theorem mul2_add:
forall f,
add f f =
mul f (
of_int (
Int.repr 2%
Z)).
Proof.
Divisions that can be turned into multiplication by an inverse.
Definition exact_inverse :
float32 ->
option float32 :=
Bexact_inverse 24 128
__ __.
Theorem div_mul_inverse:
forall x y z,
exact_inverse y =
Some z ->
div x y =
mul x z.
Proof.
intros.
apply Bdiv_mult_inverse. 2:
easy.
intros x0 y0 z0 Hx Hy Hz.
unfold binop_nan.
destruct x0;
try discriminate.
destruct y0,
z0;
reflexivity ||
discriminate.
Qed.
Properties of comparisons.
Theorem cmp_swap:
forall c x y,
cmp (
swap_comparison c)
x y =
cmp c y x.
Proof.
Theorem cmp_ne_eq:
forall f1 f2,
cmp Cne f1 f2 =
negb (
cmp Ceq f1 f2).
Proof.
Theorem cmp_lt_eq_false:
forall f1 f2,
cmp Clt f1 f2 =
true ->
cmp Ceq f1 f2 =
true ->
False.
Proof.
Theorem cmp_le_lt_eq:
forall f1 f2,
cmp Cle f1 f2 =
cmp Clt f1 f2 ||
cmp Ceq f1 f2.
Proof.
Theorem cmp_gt_eq_false:
forall x y,
cmp Cgt x y =
true ->
cmp Ceq x y =
true ->
False.
Proof.
Theorem cmp_ge_gt_eq:
forall f1 f2,
cmp Cge f1 f2 =
cmp Cgt f1 f2 ||
cmp Ceq f1 f2.
Proof.
Theorem cmp_lt_gt_false:
forall f1 f2,
cmp Clt f1 f2 =
true ->
cmp Cgt f1 f2 =
true ->
False.
Proof.
Theorem cmp_double:
forall f1 f2 c,
cmp c f1 f2 =
Float.cmp c (
to_double f1) (
to_double f2).
Proof.
Properties of conversions to/from in-memory representation.
The conversions are bijective (one-to-one).
Theorem of_to_bits:
forall f,
of_bits (
to_bits f) =
f.
Proof.
Theorem to_of_bits:
forall b,
to_bits (
of_bits b) =
b.
Proof.
Conversions from 32-bit integers to single-precision floats can
be decomposed into a conversion to a double-precision float,
followed by a Float32.of_double conversion. No double rounding occurs.
Theorem of_int_double:
forall n,
of_int n =
of_double (
Float.of_int n).
Proof.
Theorem of_intu_double:
forall n,
of_intu n =
of_double (
Float.of_intu n).
Proof.
Conversion of single-precision floats to integers can be decomposed
into a Float32.to_double extension, followed by a double-precision-to-int
conversion.
Theorem to_int_double:
forall f n,
to_int f =
Some n ->
Float.to_int (
to_double f) =
Some n.
Proof.
Theorem to_intu_double:
forall f n,
to_intu f =
Some n ->
Float.to_intu (
to_double f) =
Some n.
Proof.
Theorem to_long_double:
forall f n,
to_long f =
Some n ->
Float.to_long (
to_double f) =
Some n.
Proof.
Theorem to_longu_double:
forall f n,
to_longu f =
Some n ->
Float.to_longu (
to_double f) =
Some n.
Proof.
Conversions from 64-bit integers to single-precision floats can be expressed
as conversion to a double-precision float followed by a Float32.of_double conversion.
To avoid double rounding when the integer is large (above 2^53), a round
to odd must be performed on the integer before conversion to double-precision float.
Lemma int_round_odd_plus:
forall p n, 0 <=
p ->
int_round_odd n p =
Z.land (
Z.lor n (
Z.land n (2^
p-1) + (2^
p-1))) (-(2^
p)).
Proof.
intros.
assert (
POS: 0 < 2^
p)
by (
apply (
Zpower_gt_0 radix2);
auto).
assert (
A:
Z.land n (2^
p-1) =
n mod 2^
p).
{
rewrite <-
Z.land_ones by auto.
f_equal.
rewrite Z.ones_equiv.
lia. }
rewrite A.
assert (
B: 0 <=
n mod 2^
p < 2^
p).
{
apply Z_mod_lt.
lia. }
set (
m :=
n mod 2^
p + (2^
p-1))
in *.
assert (
C:
m / 2^
p =
if zeq (
n mod 2^
p) 0
then 0
else 1).
{
unfold m.
destruct (
zeq (
n mod 2^
p) 0).
rewrite e.
apply Z.div_small.
lia.
eapply Coqlib.Zdiv_unique with (
n mod 2^
p - 1).
ring.
lia. }
assert (
D:
Z.testbit m p =
if zeq (
n mod 2^
p) 0
then false else true).
{
destruct (
zeq (
n mod 2^
p) 0).
apply Z.testbit_false;
auto.
rewrite C;
auto.
apply Z.testbit_true;
auto.
rewrite C;
auto. }
assert (
E:
forall i,
p <
i ->
Z.testbit m i =
false).
{
intros.
apply Z.testbit_false.
lia.
replace (
m / 2^
i)
with 0.
auto.
symmetry.
apply Z.div_small.
unfold m.
split.
lia.
apply Z.lt_le_trans with (2 * 2^
p).
lia.
change 2
with (2^1)
at 1.
rewrite <- (
Zpower_plus radix2)
by lia.
apply Zpower_le.
lia. }
assert (
F:
forall i, 0 <=
i ->
Z.testbit (-2^
p)
i =
if zlt i p then false else true).
{
intros.
rewrite Z.bits_opp by auto.
rewrite <-
Z.ones_equiv.
destruct (
zlt i p).
rewrite Z.ones_spec_low by lia.
auto.
rewrite Z.ones_spec_high by lia.
auto. }
apply int_round_odd_bits;
auto.
-
intros.
rewrite Z.land_spec,
F,
zlt_true by lia.
apply andb_false_r.
-
rewrite Z.land_spec,
Z.lor_spec,
D,
F,
zlt_false,
andb_true_r by lia.
destruct (
Z.eqb (
n mod 2^
p) 0)
eqn:
Z.
rewrite Z.eqb_eq in Z.
rewrite Z,
zeq_true.
apply orb_false_r.
rewrite Z.eqb_neq in Z.
rewrite zeq_false by auto.
apply orb_true_r.
-
intros.
rewrite Z.land_spec,
Z.lor_spec,
E,
F,
zlt_false,
andb_true_r by lia.
apply orb_false_r.
Qed.
Lemma of_long_round_odd:
forall n conv_nan,
2^36 <=
Z.abs n < 2^64 ->
BofZ 24 128
__ __ n =
Bconv _ _ 24 128
__ __ conv_nan mode_NE (
BofZ 53 1024
__ __ (
Z.land (
Z.lor n ((
Z.land n 2047) + 2047)) (-2048))).
Proof.
Theorem of_longu_double_1:
forall n,
Int64.unsigned n <= 2^53 ->
of_longu n =
of_double (
Float.of_longu n).
Proof.
Theorem of_longu_double_2:
forall n,
2^36 <=
Int64.unsigned n ->
of_longu n =
of_double (
Float.of_longu
(
Int64.and (
Int64.or n
(
Int64.add (
Int64.and n (
Int64.repr 2047))
(
Int64.repr 2047)))
(
Int64.repr (-2048)))).
Proof.
Theorem of_long_double_1:
forall n,
Z.abs (
Int64.signed n) <= 2^53 ->
of_long n =
of_double (
Float.of_long n).
Proof.
Theorem of_long_double_2:
forall n,
2^36 <=
Z.abs (
Int64.signed n) ->
of_long n =
of_double (
Float.of_long
(
Int64.and (
Int64.or n
(
Int64.add (
Int64.and n (
Int64.repr 2047))
(
Int64.repr 2047)))
(
Int64.repr (-2048)))).
Proof.
End Float32.
Global Opaque
Float.zero Float.eq_dec Float.neg Float.abs Float.of_single Float.to_single
Float.of_int Float.of_intu Float.of_long Float.of_longu
Float.to_int Float.to_intu Float.to_long Float.to_longu
Float.add Float.sub Float.mul Float.div Float.cmp Float.ordered
Float.to_bits Float.of_bits Float.from_words.
Global Opaque
Float32.zero Float32.eq_dec Float32.neg Float32.abs
Float32.of_int Float32.of_intu Float32.of_long Float32.of_longu
Float32.to_int Float32.to_intu Float32.to_long Float32.to_longu
Float32.add Float32.sub Float32.mul Float32.div Float32.cmp Float32.ordered
Float32.to_bits Float32.of_bits.