|
| 1 | +From Coq Require Import ZArith Reals Psatz Lia. |
| 2 | +From Flocq Require Import Core.Zaux Core.Defs Core.Generic_fmt Core.FLX |
| 3 | + Core.Round_NE Core.Raux Core.Float_prop. |
| 4 | + |
| 5 | +Open Scope R_scope. |
| 6 | + |
| 7 | +Section FMA_Remainder. |
| 8 | + |
| 9 | +Variable p : Z. |
| 10 | +Hypothesis Hp : (1 < p)%Z. |
| 11 | +Let beta := radix2. |
| 12 | +Let fexp := FLX_exp p. |
| 13 | +Let format := generic_format beta fexp. |
| 14 | +Let round_ne := round beta fexp ZnearestE. |
| 15 | +Instance Hp0 : Prec_gt_0 p. unfold Prec_gt_0. lia. Qed. |
| 16 | + |
| 17 | +Lemma round_exact : forall x, format x -> round_ne x = x. |
| 18 | +Proof. intros x Hx. apply round_generic; [apply valid_rnd_N|exact Hx]. Qed. |
| 19 | + |
| 20 | +Lemma format_F2R_bounded : forall m e : Z, |
| 21 | + (Z.abs m < Zpower beta p)%Z -> format (F2R (Float beta m e)). |
| 22 | +Proof. |
| 23 | + intros m e Hm. apply generic_format_FLX. |
| 24 | + econstructor; [reflexivity|simpl; exact Hm]. |
| 25 | +Qed. |
| 26 | + |
| 27 | +(** Case 1: ex >= ey. Align x to exponent ey, show mantissa < 2^p. *) |
| 28 | +Theorem remainder_format_ge : |
| 29 | + forall (mx my n : Z) (ex ey : Z), |
| 30 | + (Z.abs mx < Zpower beta p)%Z -> |
| 31 | + (Z.abs my < Zpower beta p)%Z -> |
| 32 | + (ey <= ex)%Z -> my <> 0%Z -> |
| 33 | + let x := F2R (Float beta mx ex) in |
| 34 | + let y := F2R (Float beta my ey) in |
| 35 | + Rabs (x - IZR n * y) <= Rabs y / 2 -> |
| 36 | + format (x - IZR n * y). |
| 37 | +Proof. |
| 38 | + intros mx my n ex ey Hmx Hmy Hle Hmy0 x y Hbound. |
| 39 | + destruct (Req_dec (x - IZR n * y) 0) as [Hz|Hnz]. |
| 40 | + { rewrite Hz. apply generic_format_0. } |
| 41 | + unfold x. rewrite (@F2R_change_exp beta ey mx ex Hle). |
| 42 | + set (M := (mx * Zpower beta (ex - ey) - n * my)%Z). |
| 43 | + assert (Heq : F2R (Float beta (mx * Zpower beta (ex - ey)) ey) - IZR n * y |
| 44 | + = F2R (Float beta M ey)). |
| 45 | + { unfold M, y, F2R. simpl Fnum. simpl Fexp. |
| 46 | + rewrite minus_IZR, 2!mult_IZR. |
| 47 | + unfold Rminus. rewrite Rmult_plus_distr_r. |
| 48 | + rewrite <- Ropp_mult_distr_l, <- Rmult_assoc. reflexivity. } |
| 49 | + rewrite Heq. apply format_F2R_bounded. |
| 50 | + assert (Hbp : (0 < bpow beta ey)%R) by apply bpow_gt_0. |
| 51 | + assert (HMeq : IZR M * bpow beta ey = x - IZR n * y). |
| 52 | + { unfold x. rewrite (@F2R_change_exp beta ey mx ex Hle). symmetry. |
| 53 | + exact Heq. } |
| 54 | + assert (Hyeq : IZR my * bpow beta ey = y). |
| 55 | + { unfold y, F2R. simpl. reflexivity. } |
| 56 | + assert (H1 : Rabs (IZR M) * bpow beta ey = Rabs (x - IZR n * y)). |
| 57 | + { rewrite <- HMeq, Rabs_mult, (Rabs_right (bpow beta ey)); |
| 58 | + [reflexivity | left; exact Hbp]. } |
| 59 | + assert (H2 : Rabs y = Rabs (IZR my) * bpow beta ey). |
| 60 | + { rewrite <- Hyeq, Rabs_mult, (Rabs_right (bpow beta ey)); |
| 61 | + [reflexivity | left; exact Hbp]. } |
| 62 | + assert (H3 : Rabs (IZR M) * bpow beta ey <= |
| 63 | + Rabs (IZR my) * bpow beta ey / 2). |
| 64 | + { rewrite H1. rewrite H2 in Hbound. exact Hbound. } |
| 65 | + assert (Hbound' : Rabs (IZR M) <= Rabs (IZR my) / 2). |
| 66 | + { apply Rmult_le_reg_r with (bpow beta ey); [exact Hbp|]. lra. } |
| 67 | + apply lt_IZR. rewrite abs_IZR. |
| 68 | + apply Rle_lt_trans with (1 := Hbound'). |
| 69 | + apply Rle_lt_trans with (Rabs (IZR my)). |
| 70 | + { assert (H : 0 <= Rabs (IZR my)) by apply Rabs_pos. |
| 71 | + apply Rle_trans with (Rabs (IZR my) * 1); [|lra]. |
| 72 | + apply Rmult_le_compat_l; [exact H|]. lra. } |
| 73 | + rewrite Rabs_Zabs. apply IZR_lt. exact Hmy. |
| 74 | +Qed. |
| 75 | + |
| 76 | +(** Case 2: ex < ey. Align y to exponent ex, show mantissa < 2^p. *) |
| 77 | +Theorem remainder_format_lt : |
| 78 | + forall (mx my n : Z) (ex ey : Z), |
| 79 | + (Z.abs mx < Zpower beta p)%Z -> |
| 80 | + (Z.abs my < Zpower beta p)%Z -> |
| 81 | + (ex < ey)%Z -> my <> 0%Z -> |
| 82 | + let x := F2R (Float beta mx ex) in |
| 83 | + let y := F2R (Float beta my ey) in |
| 84 | + Rabs (x - IZR n * y) <= Rabs y / 2 -> |
| 85 | + format (x - IZR n * y). |
| 86 | +Proof. |
| 87 | + intros mx my n ex ey Hmx Hmy Hlt Hmy0 x y Hbound. |
| 88 | + destruct (Req_dec (x - IZR n * y) 0) as [Hz|Hnz]. |
| 89 | + { rewrite Hz. apply generic_format_0. } |
| 90 | + (* Align y to exponent ex *) |
| 91 | + assert (Hle : (ex <= ey)%Z) by lia. |
| 92 | + unfold y. rewrite (@F2R_change_exp beta ex my ey Hle). |
| 93 | + set (M := (mx - n * (my * Zpower beta (ey - ex)))%Z). |
| 94 | + assert (Heq : x - IZR n * F2R (Float beta (my * Zpower beta (ey - ex)) ex) |
| 95 | + = F2R (Float beta M ex)). |
| 96 | + { unfold M, x, F2R. simpl Fnum. simpl Fexp. |
| 97 | + rewrite minus_IZR, 3!mult_IZR. |
| 98 | + unfold Rminus. rewrite Rmult_plus_distr_r. |
| 99 | + rewrite <- Ropp_mult_distr_l. ring. } |
| 100 | + rewrite Heq. apply format_F2R_bounded. |
| 101 | + (* |M| < beta^p. Key: |M| * bpow ex = |x - n*y| <= |x| + |n*y|. |
| 102 | + But more directly: |M| * bpow ex <= |y|/2 and also |
| 103 | + |M| = |mx - n * my'| where my' = my * beta^(ey-ex). |
| 104 | + Since |x - n*y| <= |y|/2 and |x - n*y| <= |x| + |n*y|, |
| 105 | + we use: |M| <= |mx| (because |M * bpow ex| <= |mx * bpow ex| + ...). |
| 106 | + Actually the simplest bound: |M| * bpow ex = |x - n*y| <= |y|/2, |
| 107 | + and also |x - n*y| <= |x| when |n*y| >= |x| - |y|/2 >= |x|/2. |
| 108 | + But we can just use |M| <= |mx|: |
| 109 | + |M| = |mx - n*my'|. Since |M| * bpow ex <= |mx| * bpow ex |
| 110 | + (because |x - n*y| <= |x| when the remainder has the right bound... |
| 111 | + this isn't always true). |
| 112 | + |
| 113 | + Better approach: |M| * bpow ex = |x - n*y| <= |y|/2 = |my'| * bpow ex / 2. |
| 114 | + So |M| <= |my'|/2. But |my'| can be large. |
| 115 | + However, |M| * bpow ex <= |x| = |mx| * bpow ex is true because: |
| 116 | + |x - n*y| <= |y|/2 implies |x| >= |n*y| - |y|/2 = |y|(|n| - 1/2). |
| 117 | + And |x - n*y| <= |y|/2 <= |x| iff |n| >= 1. |
| 118 | + When n = 0: |x| <= |y|/2, and M = mx, so |M| = |mx| < beta^p. Done. |
| 119 | + When |n| >= 1: |x - n*y| <= |y|/2 <= |n*y| - |y|/2 + |y|/2 = |n*y|. |
| 120 | + So |x - n*y| <= |n*y| and |x - n*y| <= |y|/2. |
| 121 | + Also |x - n*y| <= |x| because |n*y| >= |x| - |y|/2 and |
| 122 | + |x - n*y| = |x| - |n*y| or |n*y| - |x| (depending on sign). |
| 123 | + If |n*y| >= |x|: |x - n*y| = |n*y| - |x| <= |y|/2, so |n*y| <= |x| + |y|/2. |
| 124 | + Then |x - n*y| = |n*y| - |x| <= |y|/2 <= |x| (since |n| >= 1 and |
| 125 | + |y| <= 2|x| from |n*y| <= |x| + |y|/2 and |n| >= 1). |
| 126 | + Wait, |y| <= 2|x| isn't guaranteed. |
| 127 | + |
| 128 | + Let me just use the direct bound for each case of n. *) |
| 129 | + assert (Hbp : (0 < bpow beta ex)%R) by apply bpow_gt_0. |
| 130 | + assert (HMeq : IZR M * bpow beta ex = x - IZR n * y). |
| 131 | + { unfold x, y. rewrite (@F2R_change_exp beta ex my ey Hle). symmetry. |
| 132 | + exact Heq. } |
| 133 | + (* When n = 0: M = mx, and |mx| < beta^p. *) |
| 134 | + destruct (Z.eq_dec n 0) as [Hn0|Hn0]. |
| 135 | + { subst n. unfold M. rewrite Z.mul_0_l, Z.sub_0_r. exact Hmx. } |
| 136 | + (* When n <> 0: |M| * bpow ex = |x - n*y| <= |y|/2. |
| 137 | + Also |M| * bpow ex = |x - n*y| <= |x| = |mx| * bpow ex. |
| 138 | + The second inequality holds because |n| >= 1 and |x - n*y| <= |y|/2 |
| 139 | + imply |x| >= |n*y| - |y|/2 >= |y|/2 >= |x - n*y|. |
| 140 | + So |M| <= |mx| < beta^p. *) |
| 141 | + assert (Habs_le_x : Rabs (x - IZR n * y) <= Rabs x). |
| 142 | + { (* |n| >= 1 and |x - n*y| <= |y|/2 imply |x - n*y| <= |x| *) |
| 143 | + assert (Hny_bound : Rabs (IZR n * y) >= Rabs y). |
| 144 | + { rewrite Rabs_mult. apply Rle_ge. |
| 145 | + rewrite <- (Rmult_1_l (Rabs y)) at 1. |
| 146 | + apply Rmult_le_compat_r; [apply Rabs_pos|]. |
| 147 | + rewrite <- abs_IZR. apply IZR_le. |
| 148 | + assert (n <> 0%Z) by exact Hn0. lia. } |
| 149 | + assert (Hny_close : Rabs (IZR n * y - x) <= Rabs y / 2). |
| 150 | + { replace (IZR n * y - x) with (- (x - IZR n * y)) by ring. |
| 151 | + rewrite Rabs_Ropp. exact Hbound. } |
| 152 | + (* |n*y| >= |y| and |n*y - x| <= |y|/2, so |x| >= |y|/2 *) |
| 153 | + assert (Hx_ge : Rabs x >= Rabs y / 2). |
| 154 | + { assert (H := Rabs_triang_inv (IZR n * y) x). |
| 155 | + assert (Habs_y_pos : 0 <= Rabs y) by apply Rabs_pos. |
| 156 | + (* |n*y - x| >= |n*y| - |x|, and |n*y - x| = |x - n*y| <= |y|/2 *) |
| 157 | + (* So |n*y| - |x| <= |y|/2, hence |x| >= |n*y| - |y|/2 >= |y| - |y|/2 *) |
| 158 | + assert (Hny_minus_x : Rabs (IZR n * y) - Rabs x <= Rabs y / 2). |
| 159 | + { replace (IZR n * y - x) with (-(x - IZR n * y)) in H by ring. |
| 160 | + rewrite Rabs_Ropp in H. lra. } |
| 161 | + lra. } |
| 162 | + (* |x - n*y| <= |y|/2 <= |x| *) |
| 163 | + lra. } |
| 164 | + (* Now: |M| * bpow ex <= |mx| * bpow ex *) |
| 165 | + assert (HM_le_mx : Rabs (IZR M) <= Rabs (IZR mx)). |
| 166 | + { apply Rmult_le_reg_r with (bpow beta ex); [exact Hbp|]. |
| 167 | + assert (H1 : Rabs (IZR M) * bpow beta ex = Rabs (IZR M * bpow beta ex)). |
| 168 | + { rewrite Rabs_mult, (Rabs_right (bpow beta ex)); [ring|left; exact Hbp]. } |
| 169 | + assert (H2 : Rabs (IZR mx) * bpow beta ex = Rabs (IZR mx * bpow beta ex)). |
| 170 | + { rewrite Rabs_mult, (Rabs_right (bpow beta ex)); [ring|left; exact Hbp]. } |
| 171 | + rewrite H1, H2. |
| 172 | + rewrite HMeq. |
| 173 | + replace (IZR mx * bpow beta ex) with x by (unfold x, F2R; simpl; ring). |
| 174 | + exact Habs_le_x. } |
| 175 | + apply lt_IZR. rewrite abs_IZR. |
| 176 | + apply Rle_lt_trans with (1 := HM_le_mx). |
| 177 | + rewrite Rabs_Zabs. apply IZR_lt. exact Hmx. |
| 178 | +Qed. |
| 179 | + |
| 180 | +(** Combined theorem: the remainder is representable for any exponents. *) |
| 181 | +Theorem remainder_format : |
| 182 | + forall (mx my n : Z) (ex ey : Z), |
| 183 | + (Z.abs mx < Zpower beta p)%Z -> |
| 184 | + (Z.abs my < Zpower beta p)%Z -> |
| 185 | + my <> 0%Z -> |
| 186 | + let x := F2R (Float beta mx ex) in |
| 187 | + let y := F2R (Float beta my ey) in |
| 188 | + Rabs (x - IZR n * y) <= Rabs y / 2 -> |
| 189 | + format (x - IZR n * y). |
| 190 | +Proof. |
| 191 | + intros mx my n ex ey Hmx Hmy Hmy0 x y Hbound. |
| 192 | + destruct (Z_le_dec ey ex) as [Hle|Hgt]. |
| 193 | + - exact (remainder_format_ge mx my n ex ey Hmx Hmy Hle Hmy0 Hbound). |
| 194 | + - apply remainder_format_lt; [exact Hmx|exact Hmy| lia |exact Hmy0|exact Hbound]. |
| 195 | +Qed. |
| 196 | + |
| 197 | +(** Main theorem: FMA returns the exact remainder. *) |
| 198 | +Theorem fma_remainder_exact : |
| 199 | + forall (mx my n : Z) (ex ey : Z), |
| 200 | + (Z.abs mx < Zpower beta p)%Z -> |
| 201 | + (Z.abs my < Zpower beta p)%Z -> |
| 202 | + my <> 0%Z -> |
| 203 | + let x := F2R (Float beta mx ex) in |
| 204 | + let y := F2R (Float beta my ey) in |
| 205 | + Rabs (x - IZR n * y) <= Rabs y / 2 -> |
| 206 | + round_ne (x - IZR n * y) = x - IZR n * y. |
| 207 | +Proof. |
| 208 | + intros. apply round_exact. now apply remainder_format. |
| 209 | +Qed. |
| 210 | + |
| 211 | +(** ================================================================ *) |
| 212 | +(** Comparison step: the correct n gives strictly smaller |remainder| *) |
| 213 | +(** ================================================================ *) |
| 214 | + |
| 215 | +(** When |R_correct| < |y|/2 (non-tie), the wrong candidate has |
| 216 | + |R_wrong| > |R_correct|, so the algorithm's min-|result| selection |
| 217 | + picks the correct one. |
| 218 | +
|
| 219 | + R_wrong = R_correct -/+ y (since n_wrong = n_correct +/- 1), so |
| 220 | + |R_wrong| = |y| - |R_correct| (when R_correct and y have appropriate |
| 221 | + signs) or |y| + |R_correct|. In either case |R_wrong| > |R_correct|. |
| 222 | +
|
| 223 | + More precisely: |R_wrong| >= |y| - |R_correct| > |y|/2 > |R_correct| |
| 224 | + when |R_correct| < |y|/2. *) |
| 225 | + |
| 226 | +Theorem comparison_step : |
| 227 | + forall r y : R, |
| 228 | + y <> 0 -> |
| 229 | + Rabs r < Rabs y / 2 -> |
| 230 | + Rabs r < Rabs (r + y) /\ Rabs r < Rabs (r - y). |
| 231 | +Proof. |
| 232 | + intros r y Hy0 Hr. |
| 233 | + assert (Hay : Rabs y > 0) by (apply Rabs_pos_lt; exact Hy0). |
| 234 | + assert (Hay2 : Rabs y / 2 > 0) by lra. |
| 235 | + split. |
| 236 | + - (* |r| < |r + y| *) |
| 237 | + (* |y| = |(r+y) - r| <= |r+y| + |r|, so |r+y| >= |y| - |r| > |y|/2 > |r| *) |
| 238 | + assert (H := Rabs_triang (r + y) (- r)). |
| 239 | + replace (r + y + - r) with y in H by ring. |
| 240 | + rewrite Rabs_Ropp in H. lra. |
| 241 | + - (* |r| < |r - y| *) |
| 242 | + assert (H := Rabs_triang (r - y) y). |
| 243 | + replace (r - y + y) with r in H by ring. |
| 244 | + (* H : |r| <= |r - y| + |y|, so |r - y| >= |r| - |y| *) |
| 245 | + (* But we need |r - y| >= |y| - |r|. Use triangle the other way: *) |
| 246 | + assert (H2 := Rabs_triang (- (r - y)) r). |
| 247 | + replace (- (r - y) + r) with y in H2 by ring. |
| 248 | + rewrite Rabs_Ropp in H2. |
| 249 | + (* H2 : |y| <= |r - y| + |r|, so |r - y| >= |y| - |r| > |y|/2 > |r| *) |
| 250 | + lra. |
| 251 | +Qed. |
| 252 | + |
| 253 | +(** Corollary: when n is wrong by 1, the correct remainder is strictly |
| 254 | + smaller in absolute value than the wrong one (in exact arithmetic). |
| 255 | + Combined with fma_remainder_exact (the correct remainder is computed |
| 256 | + exactly by FMA), this means the algorithm's min-selection works |
| 257 | + whenever the FMA of the wrong candidate preserves the ordering — |
| 258 | + which holds because |R_wrong| - |R_correct| >= ulp(R_wrong). *) |
| 259 | +Corollary wrong_n_gives_larger_remainder : |
| 260 | + forall r y : R, |
| 261 | + y <> 0 -> |
| 262 | + Rabs r < Rabs y / 2 -> |
| 263 | + Rabs r < Rabs (r + y) /\ |
| 264 | + Rabs r < Rabs (r - y). |
| 265 | +Proof. exact comparison_step. Qed. |
| 266 | + |
| 267 | +End FMA_Remainder. |
0 commit comments