Software Construction



Examples of using the regex module
import re
import regex

# regex.V0 (the default) is fully backwards compatible with the `re` module.
# regex.V1 is not but provides more features.
regex.DEFAULT_VERSION = regex.V1

input()
print("regex groups + captures")
print("-----------------------------------------------------")

ip_regex = r"(?:([0-9]|[1-9][0-9]|1[0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])"
ip = "127.0.0.1"

m1 = re.fullmatch(ip_regex, ip)

print(f"{m1.groups()=}")
print(f"{m1.group()=}")
print(f"{m1.group(0)=}")
print(f"{m1.group(1)=}")
print(f"{m1.group(2)=}")

print("-----------------------------------------------------")

m2 = regex.fullmatch(ip_regex, ip)

print(f"{m2.groups()=}")
print(f"{m2.group()=}")
print(f"{m2.group(0)=}")
print(f"{m2.group(1)=}")
print(f"{m2.group(2)=}")

print("-----------------------------------------------------")

print(f"{m2.allcaptures()=}")
print(f"{m2.captures()=}")
print(f"{m2.captures(0)=}")
print(f"{m2.captures(1)=}")
print(f"{m2.captures(2)=}")

octet_1 = m2.captures(1)[0]
octet_2 = m2.captures(1)[1]
octet_3 = m2.captures(1)[2]
octet_4 = m2.captures(2)[0]

print(f"{octet_1=}.{octet_2=}.{octet_3=}.{octet_4=}")


print("-----------------------------------------------------")
input()
print("Recursion in regex")
print("-----------------------------------------------------")

strings = """
A
AAAA
AAAAA
AAAABBABBAAAA
AAAABBBB
AAAABBBBA
AAAABBBBAAAA
AAABB
AAABBB
AABB
AABBB
AB
ABAAAAAAAAAABBA
ABAABAABAABBBBABB
ABABABABA
ABABBBBBBBBBBBBBBBAAA
ABBA
ABBAA
ABBABABABABAA
ABBBAAA
Andrew
Andrew's favourite Band is not
B
BA
BABABABAB
BBAA
BBABABABABABABAA
BBBAABBABBBAABBBAAAA
BBBAABBBBBAABBBAAAA
George is Brilliant
"""

for m in regex.finditer(r'^(A(?1)?B)$', strings, flags=regex.MULTILINE):
    print(m)

print("-----------------------------------------------------")

strings = """\
racecar
hello
helloolleh
paper
kayak
not a palindrome
"""

for m in regex.finditer(r'(\w)(?:(?R)|\w?)\1', strings, flags=regex.MULTILINE):
    print(m)

print("-----------------------------------------------------")

strings = """\
racecar
hello
helloolleh
paper
kayak
not a palindrome
"""

for m in regex.finditer(r'^((.)(?:(?1)|.?)\2)$', strings, flags=regex.MULTILINE):
    print(m)

print("-----------------------------------------------------")

# The difference between a backreference and recursion
# is that a backreference refers to the string that was matched
# while recursion refers to the the pattern.
# eg
# given the string "stackhousiaceous"
#
# The pattern: "([aeiou]{2}).*?\1" using a backreference
# matches the string "ousiaceou"
print(regex.search(r"([aeiou]{2}).*?\1", "stackhousiaceous"))
# as \1 refers to the string "ou"
#
# The pattern: "([aeiou]{2}).*?(?1))" using recursion
# matches the string "ousia"
print(regex.search(r"([aeiou]{2}).*?(?1)", "stackhousiaceous"))
# as (?1) refers to the pattern "[aeiou]{2}"

print("-----------------------------------------------------")

strings = """\
((()))
())(
()()
(())
(((((()())))()()(((())))))
(((((()())))()()(())))))
((((()()()()(((())))))
"""

for m in regex.finditer(r'^(\((?1)*\))(?1)*$', strings, flags=regex.MULTILINE):
    print(m)

print("-----------------------------------------------------")

ip_regex_1 = r"(?:([0-9]|[1-9][0-9]|1[0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])"
ip_regex_2 = r"(?:([0-9]|[1-9][0-9]|1[0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\.){3}(?1)"
ip = "127.0.0.1"

print(regex.search(ip_regex_1, ip))
print(regex.search(ip_regex_2, ip))


print("-----------------------------------------------------")
input()
print("overlapping matches")
print("-----------------------------------------------------")

tests = """
AAAAA
"""

for m in regex.finditer('A+', tests):
    print(m)

print("-----------------------------------------------------")

for m in regex.finditer('A+', tests, overlapped=True):
    print(m)

print("-----------------------------------------------------")
input()
print("timeout")
print("-----------------------------------------------------")

try:
    regex.search('(?0)', 'Hello COMP2041/9044', timeout=0.2)
except TimeoutError as e:
    print(e)

try:
    regex.search('(?R)', 'Hello COMP2041/9044')
except MemoryError as e:
    print("OOM")

print("-----------------------------------------------------")
input()
print("POSIX matches")
print("-----------------------------------------------------")

# use first match when using alternation
m = regex.search(r'Mr|Mrs', 'Mrs')
print(m)

# use longest match when using alternation (like grep)
m = regex.search(r'Mr|Mrs', 'Mrs', flags=regex.POSIX)
print(m)

print("-----------------------------------------------------")
input()
print("partial matches")
print("-----------------------------------------------------")

print(regex.fullmatch(r'z\d{7}', '', partial=True))
print(regex.fullmatch(r'z\d{7}', 'z', partial=True))
print(regex.fullmatch(r'z\d{7}', 'a', partial=True))
print(regex.fullmatch(r'z\d{7}', 'z123', partial=True))
print(regex.fullmatch(r'z\d{7}', 'z1234567', partial=True))
print(regex.fullmatch(r'z\d{7}', 'z123456789', partial=True))

print("-----------------------------------------------------")
input()
print("Character Classes")
print("-----------------------------------------------------")

print(re.search(r'[[:alpha:]]+', 'hello'))

print(regex.search(r'[[:alpha:]]+', 'hello'))
print(regex.search(r'[[:xdigit:]]+', '867F64AB75785CF75'))
print(regex.search(r'[[:xdigit:]]+', '5435FJI'))

print("-----------------------------------------------------")
input()
print("Set Operations")
print("-----------------------------------------------------")

print(regex.search(r'[a-z]+', 'hello'))
print(regex.search(r'[[a-z]--[aeiou]]+', 'hello'))
print(regex.search(r'[[a-z]~~[hgpqz]]+', 'hello'))

# || for union ("x||y" means "x or y")
# && for intersection ("x&&y" means "x and y")
# -- for difference ("x--y" means "x but not y")
# ~~ for symmetric difference ("x~~y" means "x or y, but not both")

print("-----------------------------------------------------")
input()
print("Reversed Match")
print("-----------------------------------------------------")

print(regex.findall(r".", "abc"))
print(regex.findall(r".", "abc", flags=regex.REVERSE))

print(regex.findall(r"..", "abcde"))
print(regex.findall(r"..", "abcde", flags=regex.REVERSE))

print("-----------------------------------------------------")

A list of commonly used python packages.

Misc
tabulate        # Print tables easily 
python-dateutil # Powerful date processing
natsort         # Sorting mixed numeric and text data
Faker           # Generate fake data
xeger           # Generate data that matches a regex
progress        # Print progress bars
ipython         # Better python REPL
z3-solver       # Satisfiability solver
psutil          # utilities for working with processes
more-itertools  # more itertools
frozendict      # Immutable dictionary
click           # Better argument parsing

# Image processing
Pillow

# for when you need nuclear reactor
regex   # more powerful than PCRE
wcmatch # bash style globs

# code formatting
black
autopep8
yapf

# static type checking and linting
mypy
pytype
pyre-check
pyright
pyanalyze
pyflakes
pylint
prospector

# runtime type checking
typeguard
beartype

# testing
pytest

# networking
requests
beautifulsoup4
html5lib
urllib3
selenium

# data analysis
numpy
scipy
sympy
pandas

# visualization
matplotlib
seaborn

# data formats
json5
toml
ruamel.yaml
strictyaml
PyYAML
omegaconf 

# coloured output
termcolor
colorama
blessings
rich