Computer Systems Fundamentals

12.625

12/2 = 6 R0
 6/2 = 3 R0
 3/2 = 1 R1
 1/2 = 0 R1

12: 1100

0.625 * 2 = 1.25
0.25  * 2 = 0.5
0.5   * 2 = 1.0

0.625 = 0.101

12.625: 1100.101


=============   

0.1 * 2 = 0.2
0.2 * 2 = 0.4
0.4 * 2 = 0.8
0.8 * 2 = 1.6
0.6 * 2 = 1.2
0.2 * 2 = 0.4
0.4 * 2 = 0.8
0.8 * 2 = 1.6
0.6 * 2 = 1.2

0.0001100110011001100110011001100110011

Print size and min and max values of floating point types
```
$ ./floating_types
float        4 bytes  min=1.17549e-38   max=3.40282e+38
double       8 bytes  min=2.22507e-308  max=1.79769e+308
long double 16 bytes  min=3.3621e-4932  max=1.18973e+4932
```

#include <stdio.h>
#include <float.h>

int main(void) {

    float f;
    double d;
    long double l;
    printf("float       %2lu bytes  min=%-12g  max=%g\n", sizeof f, FLT_MIN, FLT_MAX);
    printf("double      %2lu bytes  min=%-12g  max=%g\n", sizeof d, DBL_MIN, DBL_MAX);
    printf("long double %2lu bytes  min=%-12Lg  max=%Lg\n", sizeof l, LDBL_MIN, LDBL_MAX);

    return 0;
}
#include <stdio.h>

int main(void) {
    double d = 4/7.0;

    // prints in decimal with (default) 6 decimal places
    printf("%lf\n", d);        // prints 0.571429

    // prints in scientific notation
    printf("%le\n", d);       // prints 5.714286e-01

    // picks best of decimal and scientific notation
    printf("%lg\n", d);       // prints 0.571429

    //  prints in decimal with 9 decimal places
    printf("%.9lf\n", d);    // prints 0.571428571

    //  prints in decimal with 1 decimal place and field width of 5
    printf("%10.1lf\n", d);  // prints        0.6

    return 0;
}

#include <stdio.h>
#include <math.h>

int main(void) {

    double x = 1.0/0.0;

    printf("%lf\n", x); //prints inf

    printf("%lf\n", -x); //prints -inf

    printf("%lf\n", x - 1); // prints inf

    printf("%lf\n", 2 * atan(x)); // prints 3.141593

    printf("%d\n", 42 < x); // prints 1 (true)

    printf("%d\n", x == INFINITY); // prints 1 (true)

    return 0;
}

#include <stdio.h>
#include <math.h>

int main(void) {

    double x = 0.0/0.0;

    printf("%lf\n", x); //prints nan

    printf("%lf\n", x - 1); // prints nan

    printf("%d\n", x == x); // prints 0 (false)

    printf("%d\n", isnan(x)); // prints 1 (true)

    return 0;
}
Convert 1 to floating point representation

1 = 1.0 * 2^0
sign: 0
exp: 0 + 127 = 01111111
frac: 1.0

00111111100000000000000000000000  (1)


========================================
Convert to decimal

0 10000000 11000000000000000000000

sign: +ve
exp: 10000000 = 128 - 127 = 1
frac: 1.11

answer = 1.11*2^1 = 11.1  = 3.5

===========================================
Convert to decimal

1 01111110 10000000000000000000000

sign: -ve
exp: 01111110 = 126 - 127 = -1
frac: 1.1

answer = -1.1*2^-1 = -0.11 = -0.75

================================
Extra example:
                             
0 10000000 10000000000000000000000

sign: +ve
exp: 10000000 = 128 - 127 = 1
fraction: 1.1
result: 1.1 * 2^1 = 11 = 3


The value 0.1 can not be precisely represented as a double
As a result b != 0
#include <stdio.h>

int main(void) {
    double a, b;

    a = 0.1;
    b = 1 - (a + a + a + a + a + a + a + a + a + a);

    if (b != 0) {  // better would be fabs(b) > 0.000001
        printf("1 != 0.1+0.1+0.1+0.1+0.1+0.1+0.1+0.1+0.1+0.1\n");
    }

    printf("b = %g\n", b); // prints 1.11022e-16

    return 0;
}

- 9007199254740993 is $2^{53} + 1$ \
  it is smallest integer which can not be represented exactly as a double
- The closest double to 9007199254740993 is 9007199254740992.0
- aside: 9007199254740993 can not be represented by a int32_t \
  it can be represented by int64_t

#include <stdio.h>

int main(void) {


    // loop looks to print 10 numbers but actually never terminates
    double d = 9007199254740990;
    while (d < 9007199254741000) {
        printf("%lf\n", d); // always prints 9007199254740992.000000

        // 9007199254740993 can not be represented as a double
        // closest double is 9007199254740992.0
        // so 9007199254740992.0 + 1 = 9007199254740992.0
        d = d + 1;
    }

    return 0;
}
INTERNAL ERROR MISSING FILE: "./templates/topic/floating_point/code/topic/floating_point/code/functionPointer1.c"
INTERNAL ERROR MISSING FILE: "./templates/topic/floating_point/code/topic/floating_point/code/functionPointer1.c"
INTERNAL ERROR MISSING FILE: "topic/floating_point/code/"
INTERNAL ERROR MISSING FILE: "topic/floating_point/code/"