Hi again!
I am trying to figure a simple example of a program that shows the
accuracy differences between float and double types. In particular I'm
searching for an operation that works on doubles, but fails on floats,
and my first guess was an eigenvector problem (using the power method).
Unfortunately I found that they work ok in both cases, and I'm
concerned about optimizations and "smart promotions" by the compiler
I'm using (powerpc-apple-darwin9-gcc-4.0.1).
Do you have any hints about this? (code follows, probably full of errors)
Thanks!
#include <stdio.h>
#include <math.h>
int main (int argc, const char *argv[])
{
float f11, f12, f21, f22, f1, f2, f1new, f2new, fnorm, ferr;
double d11, d12, d21, d22, d1, d2, d1new, d2new, dnorm, derr;
printf("EIG = { 1, 0.0000313743 } (%e , %e)\n", 1.0, 0.0000313743);
f11 = 1001.321;
f12 = -3.0;
f21 = 3.14159 / 100.0;
f22 = -6.21e-3;
f1 = f11;
f2 = f22;
fnorm = sqrt(f1 * f1 + f2 * f2);
f1 /= fnorm;
f2 /= fnorm;
printf("MAT %e , %e ; %e , %e / VEC %e , %e \n",
f11, f12, f21, f22, f1, f2);
/* {a11 b1 + a12 b2, a21 b1 + a22 b2} */
do
{
f1new = f11 * f1 + f12 * f2;
f2new = f21 * f1 + f22 * f2;
fnorm = sqrt(f1new * f1new + f2new * f2new);
f1new /= fnorm;
f2new /= fnorm;
ferr = sqrt((f1new - f1) * (f1new - f1) + (f2new - f2) *
(f2new - f2));
printf("OLD = %e , %e ; NEW = %e , %e ; ERR = %e \n",
f1, f2, f1new, f2new, ferr);
f1 = f1new;
f2 = f2new;
} while (ferr 0.001);
printf("FLT = %e , %e (ERR=%e) \n", f1, f2, ferr);
printf("\n\nDOU BLE\n\n");
d11 = 1001.321;
d12 = -3.0;
d21 = 3.14159 / 100.0;
d22 = -6.21e-3;
d1 = d11;
d2 = d22;
dnorm = sqrt(d1 * d1 + d2 * d2);
d1 /= dnorm;
d2 /= dnorm;
printf("MAT %e , %e ; %e , %e / VEC %e , %e \n",
d11, d12, d21, d22, d1, d2);
/* {a11 b1 + a12 b2, a21 b1 + a22 b2} */
do
{
d1new = d11 * d1 + d12 * d2;
d2new = d21 * d1 + d22 * d2;
dnorm = sqrt(d1new * d1new + d2new * d2new);
d1new /= dnorm;
d2new /= dnorm;
derr = sqrt((d1new - d1) * (d1new - d1) + (d2new - d2) *
(d2new - d2));
printf("OLD = %e , %e ; NEW = %e , %e ; ERR = %e \n",
d1, d2, d1new, d2new, derr);
d1 = d1new;
d2 = d2new;
} while (derr 0.001);
printf("DBL = %e , %e (ERR=%e) \n", d1, d2, derr);
printf("\n\nDIF F FLT-DBL=%e \n", sqrt((f1 - d1) * (f1 - d2) + (f2 -
d2) * (f2 - d2)));
return 0;
}
--
SenseiĀ <Sensei's e-mail is at Mac-dot-com>
Beware of bugs in the above code; I have only proved it correct, not tried it.
(Donald Knuth)
I am trying to figure a simple example of a program that shows the
accuracy differences between float and double types. In particular I'm
searching for an operation that works on doubles, but fails on floats,
and my first guess was an eigenvector problem (using the power method).
Unfortunately I found that they work ok in both cases, and I'm
concerned about optimizations and "smart promotions" by the compiler
I'm using (powerpc-apple-darwin9-gcc-4.0.1).
Do you have any hints about this? (code follows, probably full of errors)
Thanks!
#include <stdio.h>
#include <math.h>
int main (int argc, const char *argv[])
{
float f11, f12, f21, f22, f1, f2, f1new, f2new, fnorm, ferr;
double d11, d12, d21, d22, d1, d2, d1new, d2new, dnorm, derr;
printf("EIG = { 1, 0.0000313743 } (%e , %e)\n", 1.0, 0.0000313743);
f11 = 1001.321;
f12 = -3.0;
f21 = 3.14159 / 100.0;
f22 = -6.21e-3;
f1 = f11;
f2 = f22;
fnorm = sqrt(f1 * f1 + f2 * f2);
f1 /= fnorm;
f2 /= fnorm;
printf("MAT %e , %e ; %e , %e / VEC %e , %e \n",
f11, f12, f21, f22, f1, f2);
/* {a11 b1 + a12 b2, a21 b1 + a22 b2} */
do
{
f1new = f11 * f1 + f12 * f2;
f2new = f21 * f1 + f22 * f2;
fnorm = sqrt(f1new * f1new + f2new * f2new);
f1new /= fnorm;
f2new /= fnorm;
ferr = sqrt((f1new - f1) * (f1new - f1) + (f2new - f2) *
(f2new - f2));
printf("OLD = %e , %e ; NEW = %e , %e ; ERR = %e \n",
f1, f2, f1new, f2new, ferr);
f1 = f1new;
f2 = f2new;
} while (ferr 0.001);
printf("FLT = %e , %e (ERR=%e) \n", f1, f2, ferr);
printf("\n\nDOU BLE\n\n");
d11 = 1001.321;
d12 = -3.0;
d21 = 3.14159 / 100.0;
d22 = -6.21e-3;
d1 = d11;
d2 = d22;
dnorm = sqrt(d1 * d1 + d2 * d2);
d1 /= dnorm;
d2 /= dnorm;
printf("MAT %e , %e ; %e , %e / VEC %e , %e \n",
d11, d12, d21, d22, d1, d2);
/* {a11 b1 + a12 b2, a21 b1 + a22 b2} */
do
{
d1new = d11 * d1 + d12 * d2;
d2new = d21 * d1 + d22 * d2;
dnorm = sqrt(d1new * d1new + d2new * d2new);
d1new /= dnorm;
d2new /= dnorm;
derr = sqrt((d1new - d1) * (d1new - d1) + (d2new - d2) *
(d2new - d2));
printf("OLD = %e , %e ; NEW = %e , %e ; ERR = %e \n",
d1, d2, d1new, d2new, derr);
d1 = d1new;
d2 = d2new;
} while (derr 0.001);
printf("DBL = %e , %e (ERR=%e) \n", d1, d2, derr);
printf("\n\nDIF F FLT-DBL=%e \n", sqrt((f1 - d1) * (f1 - d2) + (f2 -
d2) * (f2 - d2)));
return 0;
}
--
SenseiĀ <Sensei's e-mail is at Mac-dot-com>
Beware of bugs in the above code; I have only proved it correct, not tried it.
(Donald Knuth)
Comment