12.625 12/2 = 6 R0 6/2 = 3 R0 3/2 = 1 R1 1/2 = 0 R1 12: 1100 0.625 * 2 = 1.25 0.25 * 2 = 0.5 0.5 * 2 = 1.0 0.625 = 0.101 12.625: 1100.101 ============= 0.1 * 2 = 0.2 0.2 * 2 = 0.4 0.4 * 2 = 0.8 0.8 * 2 = 1.6 0.6 * 2 = 1.2 0.2 * 2 = 0.4 0.4 * 2 = 0.8 0.8 * 2 = 1.6 0.6 * 2 = 1.2 0.0001100110011001100110011001100110011
``` $ ./floating_types float 4 bytes min=1.17549e-38 max=3.40282e+38 double 8 bytes min=2.22507e-308 max=1.79769e+308 long double 16 bytes min=3.3621e-4932 max=1.18973e+4932 ```
#include <stdio.h> #include <float.h> int main(void) { float f; double d; long double l; printf("float %2lu bytes min=%-12g max=%g\n", sizeof f, FLT_MIN, FLT_MAX); printf("double %2lu bytes min=%-12g max=%g\n", sizeof d, DBL_MIN, DBL_MAX); printf("long double %2lu bytes min=%-12Lg max=%Lg\n", sizeof l, LDBL_MIN, LDBL_MAX); return 0; }
#include <stdio.h> int main(void) { double d = 4/7.0; // prints in decimal with (default) 6 decimal places printf("%lf\n", d); // prints 0.571429 // prints in scientific notation printf("%le\n", d); // prints 5.714286e-01 // picks best of decimal and scientific notation printf("%lg\n", d); // prints 0.571429 // prints in decimal with 9 decimal places printf("%.9lf\n", d); // prints 0.571428571 // prints in decimal with 1 decimal place and field width of 5 printf("%10.1lf\n", d); // prints 0.6 return 0; }
#include <stdio.h> #include <math.h> int main(void) { double x = 1.0/0.0; printf("%lf\n", x); //prints inf printf("%lf\n", -x); //prints -inf printf("%lf\n", x - 1); // prints inf printf("%lf\n", 2 * atan(x)); // prints 3.141593 printf("%d\n", 42 < x); // prints 1 (true) printf("%d\n", x == INFINITY); // prints 1 (true) return 0; }
#include <stdio.h> #include <math.h> int main(void) { double x = 0.0/0.0; printf("%lf\n", x); //prints nan printf("%lf\n", x - 1); // prints nan printf("%d\n", x == x); // prints 0 (false) printf("%d\n", isnan(x)); // prints 1 (true) return 0; }
Convert 1 to floating point representation 1 = 1.0 * 2^0 sign: 0 exp: 0 + 127 = 01111111 frac: 1.0 00111111100000000000000000000000 (1) ======================================== Convert to decimal 0 10000000 11000000000000000000000 sign: +ve exp: 10000000 = 128 - 127 = 1 frac: 1.11 answer = 1.11*2^1 = 11.1 = 3.5 =========================================== Convert to decimal 1 01111110 10000000000000000000000 sign: -ve exp: 01111110 = 126 - 127 = -1 frac: 1.1 answer = -1.1*2^-1 = -0.11 = -0.75 ================================ Extra example: 0 10000000 10000000000000000000000 sign: +ve exp: 10000000 = 128 - 127 = 1 fraction: 1.1 result: 1.1 * 2^1 = 11 = 3
#include <stdio.h> int main(void) { double a, b; a = 0.1; b = 1 - (a + a + a + a + a + a + a + a + a + a); if (b != 0) { // better would be fabs(b) > 0.000001 printf("1 != 0.1+0.1+0.1+0.1+0.1+0.1+0.1+0.1+0.1+0.1\n"); } printf("b = %g\n", b); // prints 1.11022e-16 return 0; }
- 9007199254740993 is $2^{53} + 1$ \ it is smallest integer which can not be represented exactly as a double - The closest double to 9007199254740993 is 9007199254740992.0 - aside: 9007199254740993 can not be represented by a int32_t \ it can be represented by int64_t
#include <stdio.h> int main(void) { // loop looks to print 10 numbers but actually never terminates double d = 9007199254740990; while (d < 9007199254741000) { printf("%lf\n", d); // always prints 9007199254740992.000000 // 9007199254740993 can not be represented as a double // closest double is 9007199254740992.0 // so 9007199254740992.0 + 1 = 9007199254740992.0 d = d + 1; } return 0; }
INTERNAL ERROR MISSING FILE: "./templates/topic/floating_point/code/topic/floating_point/code/functionPointer1.c"
INTERNAL ERROR MISSING FILE: "topic/floating_point/code/"