#! /usr/bin/env python3 # Python has a built-in module for dealing with Unicode strings # Updated regularly to match the latest Unicode standard import unicodedata string1 = "Hello World"; # normal ASCII string2 = "Hellо Wоrld"; # These are not latin o's string3 = "Hellⲟ W𐓪rld"; # These are also not latin o's and different from the above non-latin o's string4 = "Ⓗⓔⓛⓛⓞ Ⓦⓞⓡⓛⓓ"; # letters in circles, sure that exists in UNICODE for some reason string5 = "Hëllo World"; # e with a diaeresis (one character) string6 = "Hëllo World"; # latin small letter e followed by a combining diaeresis (two characters) def tryEqualities(s1, s2): return ( s1 == s2, # normalization rules are used to compare UNICODE characters that are semantically equivalent even if they are not identical # NFC is Canonical Composition # NFKC is Compatibility Composition # NFD is Canonical Decomposition # NFKD is Compatibility Decomposition # Compatibility is a less strict equality than Canonical # Composition means that eg "letter e followed by a combining diaeresis" is converted to "e with a diaeresis" # Decomposition means that eg "e with a diaeresis" is converted to "letter e followed by a combining diaeresis" unicodedata.normalize('NFC', s1) == unicodedata.normalize('NFC', s2), unicodedata.normalize('NFKC', s1) == unicodedata.normalize('NFKC', s2), unicodedata.normalize('NFD', s1) == unicodedata.normalize('NFD', s2), unicodedata.normalize('NFKD', s1) == unicodedata.normalize('NFKD', s2), ) print("string1 == string2:", tryEqualities(string1, string2)) print("string1 == string3:", tryEqualities(string1, string3)) print("string1 == string4:", tryEqualities(string1, string4)) print("string1 == string5:", tryEqualities(string1, string5)) print("string1 == string6:", tryEqualities(string1, string6)) print("string2 == string3:", tryEqualities(string2, string3)) print("string2 == string4:", tryEqualities(string2, string4)) print("string2 == string5:", tryEqualities(string2, string5)) print("string2 == string6:", tryEqualities(string2, string6)) print("string3 == string4:", tryEqualities(string3, string4)) print("string3 == string5:", tryEqualities(string3, string5)) print("string3 == string6:", tryEqualities(string3, string6)) print("string4 == string5:", tryEqualities(string4, string5)) print("string4 == string6:", tryEqualities(string4, string6)) print("string5 == string6:", tryEqualities(string5, string6)) input() print(len(string1)) print(len(string2)) print(len(string3)) print(len(string4)) print(len(string5)) print(len(string6))