Software Construction



Print the last number (real or integer) on every line
Note: regexp to match number: -?\d+\.?\d*
Note: use of assignment operator :=
import re, sys

for line in sys.stdin:
    if m := re.search(r'(-?\d+\.?\d*)\D*$', line):
        print(m.group(1))
written by andrewtunsw.edu.au as a COMP(2041|9044) lecture example
print the sum and mean of any positive integers found on stdin
Note regexp to split on non-digits
Note check to handle empty string from split
Only positive integers handled
import re, sys

input_as_string = sys.stdin.read()

numbers = re.split(r"\D+", input_as_string)

total = 0
n = 0
for number in numbers:
    if number:
        total += int(number)
        n += 1

if numbers:
    print(f"{n} numbers, total {total}, mean {total / n:.1f}")

print the sum and mean of any numbers found on stdin
Note regexp to match number -?\d+\.?\d* match postive & negative integers & floating-point numbers
import re, sys

input_as_string = sys.stdin.read()

numbers = re.findall(r"-?\d+\.?\d*", input_as_string)

n = len(numbers)
total = sum(float(number) for number in numbers)

if numbers:
    print(f"{n} numbers, total {total}, mean {total / n:.1f}")

count people enrolled in courses implemented using regular expressions & dicts
import re

COURSE_CODES_FILE = "course_codes.tsv"
ENROLLMENTS_FILE = "enrollments.txt"

# course_codes.tsv contains tab separated UNSW course and names, e..g
# ACCT1501  Accounting & Financial Mgt 1A

# enrollments.txt contains synthetic course enrollments
# with fields separated by | e.g.:
# COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M

course_names = {}
with open(COURSE_CODES_FILE, encoding="utf-8") as f:
    for line in f:
        if m := re.match(r"(\S+)\s+(.*\S)", line):
            course_names[m.group(1)] = m.group(2)

enrollments_count = {}
with open(ENROLLMENTS_FILE, encoding="utf-8") as f:
    for line in f:
        course_code = re.sub(r"\|.*\n", "", line)
        if course_code not in enrollments_count:
            enrollments_count[course_code] = 0
        enrollments_count[course_code] += 1

for (course_code, enrollment) in sorted(enrollments_count.items()):
    # if no name for course_code use ???
    name = course_names.get(course_code, "???")
    print(f"{enrollment:4} {course_code} {name}")

count people enrolled in courses implemented using string operations, a dict & counters
import collections
import re

COURSE_CODES_FILE = "course_codes.tsv"
ENROLLMENTS_FILE = "enrollments.txt"

# course_codes.tsv contains tab separated UNSW course and names, e..g
# ACCT1501  Accounting & Financial Mgt 1A

# enrollments.txt contains synthetic course enrollments
# with fields separated by |
# COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M

course_names = {}
with open(COURSE_CODES_FILE, encoding="utf-8") as f:
    for line in f:
        course_code, course_name = line.strip().split("\t", maxsplit=1)
        course_names[course_code] = course_name

enrollments_count = collections.Counter()
with open(ENROLLMENTS_FILE, encoding="utf-8") as f:
    for line in f:
        course_code = line.split("|")[0]
        enrollments_count[course_code] += 1

for (course_code, enrollment) in sorted(enrollments_count.items()):
    # if no name for course_code use ???
    name = course_names.get(course_code, "???")
    print(f"{enrollment:4} {course_code} {name}")

count how many people enrolled have each first name implemented using regular expressions, a set & counters
import collections
import re

ENROLLMENTS_FILE = "enrollments.txt"

# enrollments.txt contains synthetic course enrollments
# with fields separated by | e.g.:
# COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M

already_counted = set()
first_name_count = collections.Counter()
with open(ENROLLMENTS_FILE, encoding="utf-8") as f:
    for line in f:
        _, student_number, full_name = line.split("|")[0:3]

        if student_number in already_counted:
            continue
        already_counted.add(student_number)

        if m := re.match(r".*,\s+(\S+)", full_name):
            first_name = m.group(1)
            first_name_count[first_name] += 1

# put the count first in the tuples so sorting orders on count before name
count_name_tuples = [(c, f) for (f, c) in first_name_count.items()]

# print first names in decreasing order of popularity
for (count, first_name) in sorted(count_name_tuples, reverse=True):
    print(f"{count:4} {first_name}")


Report cases where there are 5 or more people of the same first name enrolled in a course implemented using a dict of dicts
import re
import sys

REPORT_MORE_THAN_STUDENTS = 5
ENROLLMENTS_FILE = "enrollments.txt"

# enrollments.txt contains synthetic course enrollments
# with fields separated by | e.g.:
# COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M

course_first_name_count = {}
with open(ENROLLMENTS_FILE, encoding="utf-8") as f:
    for line in f:
        course_code, _, full_name = line.split("|")[0:3]

        if m := re.match(r".*,\s+(\S+)", full_name):
            first_name = m.group(1)
        else:
            print("Warning could not parse line", line.strip(), file=sys.stderr)
            continue

        if course_code not in course_first_name_count:
            course_first_name_count[course_code] = {}

        if first_name not in course_first_name_count[course_code]:
            course_first_name_count[course_code][first_name] = 0

        course_first_name_count[course_code][first_name] += 1


for course in sorted(course_first_name_count.keys()):
    for (first_name, count) in course_first_name_count[course].items():
        if count >= REPORT_MORE_THAN_STUDENTS:
            print(course, "has", count, "students named", first_name)


Report cases where there are more than 5 people of the same first name enrolled in a course implemented using split and a defaultdict of counters
import collections
import sys

REPORT_MORE_THAN_STUDENTS = 5
ENROLLMENTS_FILE = "enrollments.txt"

# enrollments.txt contains synthetic course enrollments
# with fields separated by | e.g.:
# COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M

course_first_name_count = collections.defaultdict(collections.Counter)
with open(ENROLLMENTS_FILE, encoding="utf-8") as f:
    for line in f:
        course_code, _, full_name = line.split("|")[0:3]
        given_names = full_name.split(",")[1].strip()
        first_name = given_names.split(" ")[0]
        course_first_name_count[course_code][first_name] += 1


for (course, name_counts) in sorted(course_first_name_count.items()):
    for (first_name, count) in name_counts.items():
        if count > REPORT_MORE_THAN_STUDENTS:
            print(course, "has", count, "students named", first_name)


Change the names of the specified files to lower case. (simple version of the perl utility rename)

Note use of test to check if the new filename is unchanged.

Note the double quotes around $filename so filenames containing spaces are not broken into multiple words

Note the use of mv -- to stop mv interpreting a filename beginning with - as an option

Note files named -n or -e still break the script because echo will treat them as an option,
if test $# = 0
then
    echo "Usage $0: <files>" 1>&2
    exit 1
fi

for filename in "$@"
do
    new_filename=$(
        echo "$filename"|
        tr '[:upper:]' '[:lower:]'
        )

    test "$filename" = "$new_filename" &&
        continue

    if test -r "$new_filename"
    then
        echo "$0: $new_filename exists" 1>&2
    elif test -e "$filename"
    then
        mv -- "$filename" "$new_filename"
    else
        echo "$0: $filename not found" 1>&2
    fi

done
INTERNAL ERROR MISSING FILE: "./templates/topic/python_regex/code/topic/python_regex/code/rename_lower_case.py"
INTERNAL ERROR MISSING FILE: "./templates/topic/python_regex/code/topic/python_regex/code/rename_lower_case.py"



Change the names of the specified files by substituting occurrances of regex with replacement (simple version of the perl utility rename)
import os
import re
import sys

if len(sys.argv) < 3:
    print(f"Usage: {sys.argv[0]} <regex> <replacement> [files]", file=sys.stderr)
    sys.exit(1)

regex = sys.argv[1]
replacement = sys.argv[2]

for old_pathname in sys.argv[3:]:
    new_pathname = re.sub(regex, replacement, old_pathname, count=1)
    if new_pathname == old_pathname:
        continue
    if os.path.exists(new_pathname):
        print(f"{sys.argv[0]}: '{new_pathname}' exists", file=sys.stderr)
        continue
    try:
        os.rename(old_pathname, new_pathname)
    except OSError as e:
        print(f"{sys.argv[0]}: '{new_pathname}' {e}", file=sys.stderr)



Change the names of the specified files by substituting occurrances of regex with replacement (simple version of the perl utility rename)
also demonstrating argument processing and use of eval
beware eval can allow arbitrary code execution, it should not be used where security is importnat

import argparse
import os
import re
import sys

parser = argparse.ArgumentParser()

# add  required arguments
parser.add_argument("regex", type=str, help="match against filenames")
parser.add_argument("replacement", type=str, help="replaces matches with this")
parser.add_argument("filenames", nargs="*", help="filenames to be changed")

# add some optional boolean arguments
parser.add_argument(
    "-d", "--dryrun", action="store_true", help="show changes but don't make them"
)
parser.add_argument(
    "-v", "--verbose", action="store_true", help="print more information"
)
parser.add_argument(
    "-e",
    "--eval",
    action="store_true",
    help="evaluate replacement as python expression, match available as _",
)

# optional integer argument which defaults to 1
parser.add_argument(
    "-n",
    "--replace_n_matches",
    type=int,
    default=1,
    help="replace n matches (0 for all matches)",
)

args = parser.parse_args()


def eval_replacement(match):
    """if --eval given, evaluate replacment string as Python
    with the variable _ set to the matching part of the filename
    """
    if not args.eval:
        return args.replacement
    _ = match.group(0)
    return str(eval(args.replacement))


for old_pathname in args.filenames:
    try:
        new_pathname = re.sub(
            args.regex, eval_replacement, old_pathname, count=args.replace_n_matches
        )
    except OSError as e:
        print(
            f"{sys.argv[0]}: '{old_pathname}': '{args.replacement}'  {e}",
            file=sys.stderr,
        )
        continue

    if new_pathname == old_pathname:
        if args.verbose:
            print("no change:", old_pathname)
        continue

    if os.path.exists(new_pathname):
        print(f"{sys.argv[0]}: '{new_pathname}' exists", file=sys.stderr)
        continue

    if args.dryrun:
        print(old_pathname, "would be renamed to", new_pathname)
        continue

    if args.verbose:
        print("'renaming", old_pathname, "to", new_pathname)
    try:
        os.rename(old_pathname, new_pathname)
    except OSError as e:
        print(f"{sys.argv[0]}: '{new_pathname}' {e}", file=sys.stderr)
written by andrewtunsw.edu.au as a COMP(2041|9044) lecture example

For each file given as argument replace occurrences of Hermione allowing for some misspellings with Harry and vice-versa.
Relies on Zaphod not occurring in the text.
import re, sys, os

for filename in sys.argv[1:]:
    tmp_filename = filename + ".new"
    if os.path.exists(tmp_filename):
        print(f"{sys.argv[0]}: {tmp_filename} already exists\n", file=sys.stderr)
        sys.exit(1)
    with open(filename) as f:
        with open(tmp_filename, "w") as g:
            for line in f:
                changed_line = re.sub(r"Herm[io]+ne", "Zaphod", line)
                changed_line = changed_line.replace("Harry", "Hermione")
                changed_line = changed_line.replace("Zaphod", "Harry")
                g.write(changed_line)
    os.rename(tmp_filename, filename)
written by andrewtunsw.edu.au as a COMP(2041|9044) lecture example

For each file given as argument replace occurrences of Hermione allowing for some misspellings with Harry and vice-versa.
Relies on Zaphod not occurring in the text.
import re, sys, os, shutil, tempfile

for filename in sys.argv[1:]:
    with tempfile.NamedTemporaryFile(mode='w', delete=False) as tmp:
        with open(filename) as f:
            for line in f:
                changed_line = re.sub(r"Herm[io]+ne", "Zaphod", line)
                changed_line = changed_line.replace("Harry", "Hermione")
                changed_line = changed_line.replace("Zaphod", "Harry")
                tmp.write(changed_line)
    shutil.move(tmp.name, filename)
written by andrewtunsw.edu.au as a COMP(2041|9044) lecture example

For each file given as argument replace occurrences of Hermione allowing for some misspellings with Harry and vice-versa.
Relies on Zaphod not occurring in the text.
modified text is stored in a list then file over-written
import re, sys, os

for filename in sys.argv[1:]:
    changed_lines = []
    with open(filename) as f:
        for line in f:
            changed_line = re.sub(r"Herm[io]+ne", "Zaphod", line)
            changed_line = changed_line.replace("Harry", "Hermione")
            changed_line = changed_line.replace("Zaphod", "Harry")
            changed_lines.append(changed_line)
    with open(filename, "w") as g:
        g.write("".join(changed_lines))
written by andrewtunsw.edu.au as a COMP(2041|9044) lecture example

For each file given as argument replace occurrences of Hermione allowing for some misspellings with Harry and vice-versa.
Relies on Zaphod not occurring in the text.
modified text is stored in a single string then file over-written
import re, sys, os

for filename in sys.argv[1:]:
    changed_lines = []
    with open(filename) as f:
        text = f.read()
    changed_text = re.sub(r"Herm[io]+ne", "Zaphod", text)
    changed_text = changed_text.replace("Harry", "Hermione")
    changed_text = changed_text.replace("Zaphod", "Harry")
    with open(filename, "w") as g:
        g.write("".join(changed_text))
ACCT1501	Accounting & Financial Mgt 1A
ACCT1511	Accounting & Financial Mgt 1B
ACCT2101	Industry Placement 1
ACCT2507	Intro to Accounting Research
ACCT2511	Financial Acct Fundamentals
ACCT2522	Management Acc for Decision
ACCT2532	Management Accounting (Hons)
ACCT2542	Corporate Financial Reporting
ACCT2552	Corporate Financial Rep (Hons)
ACCT2562	ESG:Current Dev Sustainabl Bus
ACCT2672	Accounting analytics
ACCT3202	Industry Placement 2
ACCT3303	Industry Placement 3
ACCT3563	Issues in Financial Reporting
ACCT3573	Issues in Financial Rep (Hons)
ACCT3583	Strategic Value Management
ACCT3585	E-Business: Strategy & Process
ACCT3593	Management Accounting 2 (Hon)
ACCT3601	Global Financial Reporting &
ACCT3610	Business Analysis & Valuation
ACCT3625	ESG Reporting
ACCT3672	Accounting analytics
ACCT3708	Auditing & Assurance Services
ACCT3718	Auditing &  Assurance (Hons)
ACCT3995	Fraud Examination Fundamentals
ACCT4794	Thesis (Accounting)
ACCT4795	Thesis (Accounting/Finance)
ACCT4796	Thesis (Accounting) A
ACCT4797	Thesis (Accounting) B
ACCT4798	Thesis (Accounting) C
ACCT4802	Advanced Studies in Fin Acc
ACCT4803	Regulation of Accounting
...
COMP2521|5255086|Peng, Xiang|3784/2|COMPMS|066.000|24T2|19981122|M
COMP1511|5245090|Mehta, Nicholas|3778/3|COMPCS|072.000|24T2|20030114|M
COMP1511|5243596|Ni, Michael Scott Mahdi|8543|COMPCS|000.000|24T2|19800208|M
COMP2521|5262564|Chai, Xinyang Zhihao|3778/2|COMPA1|079.167|24T2|20000210|M
COMP1911|5284253|Jin, Xiwen|3785/3|COMPA1 MATHU1|000.000|24T2|19970929|F
COMP1511|5268253|Joshi, Caleb Zekai|3784/3|COMPAS|082.333|24T2|20000324|M
COMP1511|5220119|Bian, Zhen|3781/2|MTRNES|073.000|24T2|20010726|M
COMP1531|5293620|Low, Jasper Keith|8543|COMPLS|058.857|24T2|19990101|M
COMP2041|5257618|Mohammed, Nelson|3707/2|COMPCS|079.150|24T2|20000227|M
COMP2041|5288705|Miao, Shruti|3761/4|SENGAH|061.000|24T2|19990512|F
COMP1521|5243145|Zhao, Kyle|8338|COMPZS|080.100|24T2|19850828|M
COMP9417|5267453|Nie, Yongliang|8543|COMPCS|069.068|24T2|20090618|M
COMP3511|5207846|Phung, Haoze|8416/1|COMPBH|064.125|24T2|20000320|M
COMP4953|5207846|Phung, Haoze|8416/1|COMPBH|064.125|24T2|20000320|M
COMP2511|5230032|Chiu, Jeff|3778/2|COMMJ1 COMPI1|073.103|24T2|19990418|M
COMP3121|5230032|Chiu, Jeff|3778/2|COMMJ1 COMPI1|073.103|24T2|19990418|M
COMP3331|5230032|Chiu, Jeff|3778/2|COMMJ1 COMPI1|073.103|24T2|19990418|M
COMP1511|5262686|Lyu, Nicolas|8543|COMPA1|071.667|24T2|20010218|M
COMP3900|5286602|You, Jiarui|3674/1|COMPSS|087.333|24T2|20090311|M
COMP9311|5214055|Su, Yujie|3784/3|COMPSS|079.250|24T2|20000711|F
COMP9414|5214055|Su, Yujie|3784/3|COMPSS|079.250|24T2|20000711|F
COMP9417|5214055|Su, Yujie|3784/3|COMPSS|079.250|24T2|20000711|F
COMP1531|5254632|Hunter, Jun|3778/2|COMPAS COMPCS|057.333|24T2|20040228|M
COMP2521|5297089|Shu, Varun|3785/3|COMPSS|067.000|24T2|20001108|M
COMP1531|5250164|Lin, Yiming|8543|COMPA1|088.500|24T2|19991127|M
COMP2041|5250164|Lin, Yiming|8543|COMPA1|088.500|24T2|19991127|M
COMP1511|5297400|Anand, Yize|3778/1|COMPA1|000.000|24T2|20060704|M
COMP1521|5204671|Jacob, Charlie|3674/1|COMPLS|084.200|24T2|20020216|M
COMP1521|5280354|Ji, Qian|3961/4|COMPER|000.000|24T2|20020613|F
COMP1531|5280354|Ji, Qian|3961/4|COMPER|000.000|24T2|20020613|F
COMP1521|5224183|Kwok, Minghao Tony|3502/3|COMPA1|089.889|24T2|20040526|M
COMP2521|5224183|Kwok, Minghao Tony|3502/3|COMPA1|089.889|24T2|20040526|M
...