import re
import regex
# regex.V0 (the default) is fully backwards compatible with the `re` module.
# regex.V1 is not but provides more features.
regex.DEFAULT_VERSION = regex.V1
input()
print("regex groups + captures")
print("-----------------------------------------------------")
ip_regex = r"(?:([0-9]|[1-9][0-9]|1[0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])"
ip = "127.0.0.1"
m1 = re.fullmatch(ip_regex, ip)
print(f"{m1.groups()=}")
print(f"{m1.group()=}")
print(f"{m1.group(0)=}")
print(f"{m1.group(1)=}")
print(f"{m1.group(2)=}")
print("-----------------------------------------------------")
m2 = regex.fullmatch(ip_regex, ip)
print(f"{m2.groups()=}")
print(f"{m2.group()=}")
print(f"{m2.group(0)=}")
print(f"{m2.group(1)=}")
print(f"{m2.group(2)=}")
print("-----------------------------------------------------")
print(f"{m2.allcaptures()=}")
print(f"{m2.captures()=}")
print(f"{m2.captures(0)=}")
print(f"{m2.captures(1)=}")
print(f"{m2.captures(2)=}")
octet_1 = m2.captures(1)[0]
octet_2 = m2.captures(1)[1]
octet_3 = m2.captures(1)[2]
octet_4 = m2.captures(2)[0]
print(f"{octet_1=}.{octet_2=}.{octet_3=}.{octet_4=}")
print("-----------------------------------------------------")
input()
print("Recursion in regex")
print("-----------------------------------------------------")
strings = """
A
AAAA
AAAAA
AAAABBABBAAAA
AAAABBBB
AAAABBBBA
AAAABBBBAAAA
AAABB
AAABBB
AABB
AABBB
AB
ABAAAAAAAAAABBA
ABAABAABAABBBBABB
ABABABABA
ABABBBBBBBBBBBBBBBAAA
ABBA
ABBAA
ABBABABABABAA
ABBBAAA
Andrew
Andrew's favourite Band is not
B
BA
BABABABAB
BBAA
BBABABABABABABAA
BBBAABBABBBAABBBAAAA
BBBAABBBBBAABBBAAAA
George is Brilliant
"""
for m in regex.finditer(r'^(A(?1)?B)$', strings, flags=regex.MULTILINE):
print(m)
print("-----------------------------------------------------")
strings = """\
racecar
hello
helloolleh
paper
kayak
not a palindrome
"""
for m in regex.finditer(r'(\w)(?:(?R)|\w?)\1', strings, flags=regex.MULTILINE):
print(m)
print("-----------------------------------------------------")
strings = """\
racecar
hello
helloolleh
paper
kayak
not a palindrome
"""
for m in regex.finditer(r'^((.)(?:(?1)|.?)\2)$', strings, flags=regex.MULTILINE):
print(m)
print("-----------------------------------------------------")
# The difference between a backreference and recursion
# is that a backreference refers to the string that was matched
# while recursion refers to the the pattern.
# eg
# given the string "stackhousiaceous"
#
# The pattern: "([aeiou]{2}).*?\1" using a backreference
# matches the string "ousiaceou"
print(regex.search(r"([aeiou]{2}).*?\1", "stackhousiaceous"))
# as \1 refers to the string "ou"
#
# The pattern: "([aeiou]{2}).*?(?1))" using recursion
# matches the string "ousia"
print(regex.search(r"([aeiou]{2}).*?(?1)", "stackhousiaceous"))
# as (?1) refers to the pattern "[aeiou]{2}"
print("-----------------------------------------------------")
strings = """\
((()))
())(
()()
(())
(((((()())))()()(((())))))
(((((()())))()()(())))))
((((()()()()(((())))))
"""
for m in regex.finditer(r'^(\((?1)*\))(?1)*$', strings, flags=regex.MULTILINE):
print(m)
print("-----------------------------------------------------")
ip_regex_1 = r"(?:([0-9]|[1-9][0-9]|1[0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])"
ip_regex_2 = r"(?:([0-9]|[1-9][0-9]|1[0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\.){3}(?1)"
ip = "127.0.0.1"
print(regex.search(ip_regex_1, ip))
print(regex.search(ip_regex_2, ip))
print("-----------------------------------------------------")
input()
print("overlapping matches")
print("-----------------------------------------------------")
tests = """
AAAAA
"""
for m in regex.finditer('A+', tests):
print(m)
print("-----------------------------------------------------")
for m in regex.finditer('A+', tests, overlapped=True):
print(m)
print("-----------------------------------------------------")
input()
print("timeout")
print("-----------------------------------------------------")
try:
regex.search('(?0)', 'Hello COMP2041/9044', timeout=0.2)
except TimeoutError as e:
print(e)
try:
regex.search('(?R)', 'Hello COMP2041/9044')
except MemoryError as e:
print("OOM")
print("-----------------------------------------------------")
input()
print("POSIX matches")
print("-----------------------------------------------------")
# use first match when using alternation
m = regex.search(r'Mr|Mrs', 'Mrs')
print(m)
# use longest match when using alternation (like grep)
m = regex.search(r'Mr|Mrs', 'Mrs', flags=regex.POSIX)
print(m)
print("-----------------------------------------------------")
input()
print("partial matches")
print("-----------------------------------------------------")
print(regex.fullmatch(r'z\d{7}', '', partial=True))
print(regex.fullmatch(r'z\d{7}', 'z', partial=True))
print(regex.fullmatch(r'z\d{7}', 'a', partial=True))
print(regex.fullmatch(r'z\d{7}', 'z123', partial=True))
print(regex.fullmatch(r'z\d{7}', 'z1234567', partial=True))
print(regex.fullmatch(r'z\d{7}', 'z123456789', partial=True))
print("-----------------------------------------------------")
input()
print("Character Classes")
print("-----------------------------------------------------")
print(re.search(r'[[:alpha:]]+', 'hello'))
print(regex.search(r'[[:alpha:]]+', 'hello'))
print(regex.search(r'[[:xdigit:]]+', '867F64AB75785CF75'))
print(regex.search(r'[[:xdigit:]]+', '5435FJI'))
print("-----------------------------------------------------")
input()
print("Set Operations")
print("-----------------------------------------------------")
print(regex.search(r'[a-z]+', 'hello'))
print(regex.search(r'[[a-z]--[aeiou]]+', 'hello'))
print(regex.search(r'[[a-z]~~[hgpqz]]+', 'hello'))
# || for union ("x||y" means "x or y")
# && for intersection ("x&&y" means "x and y")
# -- for difference ("x--y" means "x but not y")
# ~~ for symmetric difference ("x~~y" means "x or y, but not both")
print("-----------------------------------------------------")
input()
print("Reversed Match")
print("-----------------------------------------------------")
print(regex.findall(r".", "abc"))
print(regex.findall(r".", "abc", flags=regex.REVERSE))
print(regex.findall(r"..", "abcde"))
print(regex.findall(r"..", "abcde", flags=regex.REVERSE))
print("-----------------------------------------------------")
Examples of using the regex module