Computer Systems Fundamentals

#include <assert.h>

int ascii_to_bin_subtraction(char c) {
    return c - '0';
}

int ascii_to_bin_bitwise(char c) {
    return c & 0x0F;
}

char bin_to_ascii_addition(int i) {
    return i + '0';
}

char bin_to_ascii_bitwise(int i) {
    return i | 0b00110000;
}

int main(void) {
    assert(5 == ascii_to_bin_subtraction('5'));
    assert(5 == ascii_to_bin_bitwise('5'));
    assert(ascii_to_bin_subtraction('5') == ascii_to_bin_bitwise('5'));

    assert('5' == bin_to_ascii_addition(5));
    assert('5' == bin_to_ascii_bitwise(5));
    assert(bin_to_ascii_addition(5) == bin_to_ascii_bitwise(5));
}
#include <assert.h>
#include <string.h>
#include <stdbool.h>

char *to_upper_subtraction(char *s) {
    for (int i = 0; s[i]; i++) {
        if (s[i] >= 'a' && s[i] <= 'z') {
            s[i] -= 32;
        }
    }
    return s;
}

char *to_upper_bitwise(char * s) {
    for (int i = 0; s[i]; i++) {
        if (s[i] >= 'a' && s[i] <= 'z') {
            s[i] &= 0b11011111;
        }
    }
    return s;
}

bool case_insensitive_compare_bitwise(char *s1, char *s2) {
    for (int i = 0; s1[i] && s2[i]; i++) {
        if ((s1[i] | 0b00100000) != (s2[i] | 0b00100000)) {
            return false;
        }
    }
    return true;
}

int main(void) {
    char s1[] = "Hello, World!";
    char s2[] = "Hello, World!";
    assert(0 == strcmp("HELLO, WORLD!", to_upper_subtraction(s1)));
    assert(0 == strcmp("HELLO, WORLD!", to_upper_bitwise(s2)));

    char s3[] = "HeLLo, WOrLD!";
    char s4[] = "hEllo, WORld!";
    assert(case_insensitive_compare_bitwise(s3, s4));
}
#include <stdio.h>
#include <string.h>

int main(void) {
    char *string1 = "Hello World";
    char *string2 = "Hellо Wоrld";
    char *string3 = "Hellⲟ W𐓪rld";
    char *string4 = "Ⓗⓔⓛⓛⓞ Ⓦⓞⓡⓛⓓ";
    char *string5 = "Hëllo World";
    char *string6 = "Hëllo World";

    printf("string1 == string2: %s\n", strcmp(string1, string2) ? "Not Equal" : "Equal");
    printf("string1 == string3: %s\n", strcmp(string1, string3) ? "Not Equal" : "Equal");
    printf("string1 == string4: %s\n", strcmp(string1, string4) ? "Not Equal" : "Equal");
    printf("string1 == string5: %s\n", strcmp(string1, string5) ? "Not Equal" : "Equal");
    printf("string1 == string6: %s\n", strcmp(string1, string6) ? "Not Equal" : "Equal");
    printf("string2 == string3: %s\n", strcmp(string2, string3) ? "Not Equal" : "Equal");
    printf("string2 == string4: %s\n", strcmp(string2, string4) ? "Not Equal" : "Equal");
    printf("string2 == string5: %s\n", strcmp(string2, string5) ? "Not Equal" : "Equal");
    printf("string2 == string6: %s\n", strcmp(string2, string6) ? "Not Equal" : "Equal");
    printf("string3 == string4: %s\n", strcmp(string3, string4) ? "Not Equal" : "Equal");
    printf("string3 == string5: %s\n", strcmp(string3, string5) ? "Not Equal" : "Equal");
    printf("string3 == string6: %s\n", strcmp(string3, string6) ? "Not Equal" : "Equal");
    printf("string4 == string5: %s\n", strcmp(string4, string5) ? "Not Equal" : "Equal");
    printf("string4 == string6: %s\n", strcmp(string4, string6) ? "Not Equal" : "Equal");
    printf("string5 == string6: %s\n", strcmp(string5, string6) ? "Not Equal" : "Equal");

    printf("string1: %lu\n", strlen(string1));
    printf("string2: %lu\n", strlen(string2));
    printf("string3: %lu\n", strlen(string3));
    printf("string4: %lu\n", strlen(string4));
    printf("string5: %lu\n", strlen(string5));
    printf("string6: %lu\n", strlen(string6));
}
#include <stdio.h>

int main(void) {
    printf("The unicode code point U+1F600 encodes in UTF-8\n");
    printf("as 4 bytes: 0xF0 0x9F 0x98 0x80\n");
    printf("We can output the 4 bytes like this: \xF0\x9F\x98\x80\n");
    printf("Or like this: ");
    putchar(0xF0);
    putchar(0x9F);
    putchar(0x98);
    putchar(0x80);
    putchar('\n');
}
#include <stdio.h>
#include <stdint.h>

void print_utf8_encoding(uint32_t code_point) {
    uint8_t encoding[5] = {0};

    if (code_point < 0x80) {
        encoding[0] = code_point;
    } else if (code_point < 0x800) {
        encoding[0] = 0xC0 | (code_point >> 6);
        encoding[1] = 0x80 | (code_point & 0x3f);
    } else if (code_point < 0x10000) {
        encoding[0] = 0xE0 | (code_point >> 12);
        encoding[1] = 0x80 | ((code_point >> 6) & 0x3f);
        encoding[2] = 0x80 | (code_point  & 0x3f);
    } else if (code_point < 0x200000) {
        encoding[0] = 0xF0 | (code_point >> 18);
        encoding[1] = 0x80 | ((code_point >> 12) & 0x3f);
        encoding[2] = 0x80 | ((code_point >> 6)  & 0x3f);
        encoding[3] = 0x80 | (code_point  & 0x3f);
    }

    printf("U+%x  UTF-8: ", code_point);
    for (uint8_t *s = encoding; *s != 0; s++) {
        printf("0x%02x ", *s);
    }
    printf(" %s\n", encoding);
}

int main(void) {
    print_utf8_encoding(0x42);
    print_utf8_encoding(0x00A2);
    print_utf8_encoding(0x10be);
    print_utf8_encoding(0x1F600);
}
#include <stdio.h>
#include <string.h>
#include <stdint.h>

// UTF-8 encoding of 2665
// 11100010 10011001 10100101
        
// 11100010         10011001        10100101
// E   2            9   9           A   5

int utf8_num_bytes(uint8_t first_byte);

int main(void){ 
    // \u only works for code points with up to 4 hex digits
    printf("\u2665\n");
    
    printf("\xE2\x99\xA5\n");
    char *s = "\u2665 beats";
    printf("%s\n",s);
    printf("♥ beats\n"); //ctrl-shift-u 2665
    
    
    //string functions DO NOT WORK WITH UNICODE
    printf("%lu\n",strlen("\u2665")); //3 not 1
    printf("%lu\n",strlen(s));        //9 not 7
  
    //using our own function to work out the number
    //of bytes in unicode character with a given first byte
    uint8_t first_byte = 0xE2;
    printf("Expected nbytes for \\x%hhX is %d\n",first_byte, utf8_num_bytes(first_byte));
    
    return 0;
}


int utf8_num_bytes(uint8_t first_byte){
    if ((first_byte & 0x80) == 0) {
        return 1;
    } else if ((first_byte & 0xe0) == 0xc0) {
        return 2;
    } else if ((first_byte & 0xf0) == 0xe0) {
        return 3;
    } else if ((first_byte & 0xf8) == 0xf0) {
        return 4;
    } else {
        fprintf(stderr,"Not a valid first byte\n");
        return 0;
    }
}
 
&    U+00026  0010 0110
 
µ    U+000B5  1011 0101    
              11000010     10110101

♥    U+02665  0010 0110 0110 0101
              11100010 10011001 10100101

Reminder:
7bit  0xxxxxxx
11bit 110xxxxx 10xxxxxx
16bit 1110xxxx 10xxxxxx 10xxxxxx
24bit 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx