#include #include #include // UTF-8 encoding of 2665 // 11100010 10011001 10100101 // 11100010 10011001 10100101 // E 2 9 9 A 5 int utf8_num_bytes(uint8_t first_byte); int main(void){ // \u only works for code points with up to 4 hex digits printf("\u2665\n"); printf("\xE2\x99\xA5\n"); char *s = "\u2665 beats"; printf("%s\n",s); printf("♥ beats\n"); //ctrl-shift-u 2665 //string functions DO NOT WORK WITH UNICODE printf("%lu\n",strlen("\u2665")); //3 not 1 printf("%lu\n",strlen(s)); //9 not 7 //using our own function to work out the number //of bytes in unicode character with a given first byte uint8_t first_byte = 0xE2; printf("Expected nbytes for \\x%hhX is %d\n",first_byte, utf8_num_bytes(first_byte)); return 0; } int utf8_num_bytes(uint8_t first_byte){ if ((first_byte & 0x80) == 0) { return 1; } else if ((first_byte & 0xe0) == 0xc0) { return 2; } else if ((first_byte & 0xf0) == 0xe0) { return 3; } else if ((first_byte & 0xf8) == 0xf0) { return 4; } else { fprintf(stderr,"Not a valid first byte\n"); return 0; } }