Software Construction

            print_last_number.py
        
Print the last number (real or integer) on every line

Note: regexp to match number:  -?\d+\.?\d*

Note: use of assignment operator :=

import re, sys

for line in sys.stdin:
    if m := re.search(r'(-?\d+\.?\d*)\D*$', line):
        print(m.group(1))

        
            find_numbers.0.py
        
written by andrewtunsw.edu.au as a COMP(2041|9044) lecture example

print the sum and mean of any positive integers found on stdin

Note regexp to split on non-digits

Note check to handle empty string from split

Only positive integers handled

import re, sys

input_as_string = sys.stdin.read()

numbers = re.split(r"\D+", input_as_string)

total = 0
n = 0
for number in numbers:
    if number:
        total += int(number)
        n += 1

if numbers:
    print(f"{n} numbers, total {total}, mean {total / n:.1f}")

        
            find_numbers.1.py
        
print the sum and mean of any numbers found on stdin

Note regexp to match number -?\d+\.?\d*
match postive & negative integers & floating-point numbers

import re, sys

input_as_string = sys.stdin.read()

numbers = re.findall(r"-?\d+\.?\d*", input_as_string)

n = len(numbers)
total = sum(float(number) for number in numbers)

if numbers:
    print(f"{n} numbers, total {total}, mean {total / n:.1f}")

        
            count_enrollments.0.py
        

        


count people enrolled in courses
implemented using regular expressions & dicts



    
import re

COURSE_CODES_FILE = "course_codes.tsv"
ENROLLMENTS_FILE = "enrollments.txt"

# course_codes.tsv contains tab separated UNSW course and names, e..g
# ACCT1501  Accounting & Financial Mgt 1A

# enrollments.txt contains synthetic course enrollments
# with fields separated by | e.g.:
# COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M

course_names = {}
with open(COURSE_CODES_FILE, encoding="utf-8") as f:
    for line in f:
        if m := re.match(r"(\S+)\s+(.*\S)", line):
            course_names[m.group(1)] = m.group(2)

enrollments_count = {}
with open(ENROLLMENTS_FILE, encoding="utf-8") as f:
    for line in f:
        course_code = re.sub(r"\|.*\n", "", line)
        if course_code not in enrollments_count:
            enrollments_count[course_code] = 0
        enrollments_count[course_code] += 1

for (course_code, enrollment) in sorted(enrollments_count.items()):
    # if no name for course_code use ???
    name = course_names.get(course_code, "???")
    print(f"{enrollment:4} {course_code} {name}")

        
            count_enrollments.1.py
        

        


count people enrolled in courses
implemented using string operations, a dict & counters



    
import collections
import re

COURSE_CODES_FILE = "course_codes.tsv"
ENROLLMENTS_FILE = "enrollments.txt"

# course_codes.tsv contains tab separated UNSW course and names, e..g
# ACCT1501  Accounting & Financial Mgt 1A

# enrollments.txt contains synthetic course enrollments
# with fields separated by |
# COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M

course_names = {}
with open(COURSE_CODES_FILE, encoding="utf-8") as f:
    for line in f:
        course_code, course_name = line.strip().split("\t", maxsplit=1)
        course_names[course_code] = course_name

enrollments_count = collections.Counter()
with open(ENROLLMENTS_FILE, encoding="utf-8") as f:
    for line in f:
        course_code = line.split("|")[0]
        enrollments_count[course_code] += 1

for (course_code, enrollment) in sorted(enrollments_count.items()):
    # if no name for course_code use ???
    name = course_names.get(course_code, "???")
    print(f"{enrollment:4} {course_code} {name}")

        
            count_first_names.py
        

        


count how many people enrolled have each first name
implemented using regular expressions, a set & counters



    
import collections
import re

ENROLLMENTS_FILE = "enrollments.txt"

# enrollments.txt contains synthetic course enrollments
# with fields separated by | e.g.:
# COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M

already_counted = set()
first_name_count = collections.Counter()
with open(ENROLLMENTS_FILE, encoding="utf-8") as f:
    for line in f:
        _, student_number, full_name = line.split("|")[0:3]

        if student_number in already_counted:
            continue
        already_counted.add(student_number)

        if m := re.match(r".*,\s+(\S+)", full_name):
            first_name = m.group(1)
            first_name_count[first_name] += 1

# put the count first in the tuples so sorting orders on count before name
count_name_tuples = [(c, f) for (f, c) in first_name_count.items()]

# print first names in decreasing order of popularity
for (count, first_name) in sorted(count_name_tuples, reverse=True):
    print(f"{count:4} {first_name}")

        
            duplicate_first_names.0.py
        

        



Report cases where there are 5 or more people
of the same first name enrolled in a course
implemented using a dict of dicts



    
import re
import sys

REPORT_MORE_THAN_STUDENTS = 5
ENROLLMENTS_FILE = "enrollments.txt"

# enrollments.txt contains synthetic course enrollments
# with fields separated by | e.g.:
# COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M

course_first_name_count = {}
with open(ENROLLMENTS_FILE, encoding="utf-8") as f:
    for line in f:
        course_code, _, full_name = line.split("|")[0:3]

        if m := re.match(r".*,\s+(\S+)", full_name):
            first_name = m.group(1)
        else:
            print("Warning could not parse line", line.strip(), file=sys.stderr)
            continue

        if course_code not in course_first_name_count:
            course_first_name_count[course_code] = {}

        if first_name not in course_first_name_count[course_code]:
            course_first_name_count[course_code][first_name] = 0

        course_first_name_count[course_code][first_name] += 1


for course in sorted(course_first_name_count.keys()):
    for (first_name, count) in course_first_name_count[course].items():
        if count >= REPORT_MORE_THAN_STUDENTS:
            print(course, "has", count, "students named", first_name)

        
            duplicate_first_names.1.py
        
Report cases where there are more than 5 people
of the same first name enrolled in a course
implemented using split and a defaultdict of counters

import collections
import sys

REPORT_MORE_THAN_STUDENTS = 5
ENROLLMENTS_FILE = "enrollments.txt"

# enrollments.txt contains synthetic course enrollments
# with fields separated by | e.g.:
# COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M

course_first_name_count = collections.defaultdict(collections.Counter)
with open(ENROLLMENTS_FILE, encoding="utf-8") as f:
    for line in f:
        course_code, _, full_name = line.split("|")[0:3]
        given_names = full_name.split(",")[1].strip()
        first_name = given_names.split(" ")[0]
        course_first_name_count[course_code][first_name] += 1

for (course, name_counts) in sorted(course_first_name_count.items()):
    for (first_name, count) in name_counts.items():
        if count > REPORT_MORE_THAN_STUDENTS:
            print(course, "has", count, "students named", first_name)

        
            tolower.sh
        
Change the names of the specified files to lower case.
(simple version of the perl utility rename)

Note use of test to check if the new filename is unchanged.

Note the double quotes around $filename so filenames
containing spaces are not broken into multiple words

Note the use of mv -- to stop mv interpreting a
filename beginning with - as an option

Note files named -n or -e still break the script
because echo will treat them as an option,

if test $# = 0
then
    echo "Usage $0: <files>" 1>&2
    exit 1
fi

for filename in "$@"
do
    new_filename=$(
        echo "$filename"|
        tr '[:upper:]' '[:lower:]'
        )

    test "$filename" = "$new_filename" &&
        continue

    if test -r "$new_filename"
    then
        echo "$0: $new_filename exists" 1>&2
    elif test -e "$filename"
    then
        mv -- "$filename" "$new_filename"
    else
        echo "$0: $filename not found" 1>&2
    fi

done

        
            rename_lower_case.py
        
INTERNAL ERROR MISSING FILE: "./templates/topic/python_regex/code/topic/python_regex/code/rename_lower_case.py"
    
INTERNAL ERROR MISSING FILE: "./templates/topic/python_regex/code/topic/python_regex/code/rename_lower_case.py"

        
            rename_regex.py
        

        




Change the names of the specified files
by substituting occurrances of regex with replacement
(simple version of the perl utility rename)



    
import os
import re
import sys

if len(sys.argv) < 3:
    print(f"Usage: {sys.argv[0]} <regex> <replacement> [files]", file=sys.stderr)
    sys.exit(1)

regex = sys.argv[1]
replacement = sys.argv[2]

for old_pathname in sys.argv[3:]:
    new_pathname = re.sub(regex, replacement, old_pathname, count=1)
    if new_pathname == old_pathname:
        continue
    if os.path.exists(new_pathname):
        print(f"{sys.argv[0]}: '{new_pathname}' exists", file=sys.stderr)
        continue
    try:
        os.rename(old_pathname, new_pathname)
    except OSError as e:
        print(f"{sys.argv[0]}: '{new_pathname}' {e}", file=sys.stderr)

        
            rename_regex_eval.py
        

        




Change the names of the specified files
by substituting occurrances of regex with replacement
(simple version of the perl utility rename)

also demonstrating  argument processing and use of eval


beware eval can allow arbitrary code execution,
it should not be used where security is importnat





    
import argparse
import os
import re
import sys

parser = argparse.ArgumentParser()

# add  required arguments
parser.add_argument("regex", type=str, help="match against filenames")
parser.add_argument("replacement", type=str, help="replaces matches with this")
parser.add_argument("filenames", nargs="*", help="filenames to be changed")

# add some optional boolean arguments
parser.add_argument(
    "-d", "--dryrun", action="store_true", help="show changes but don't make them"
)
parser.add_argument(
    "-v", "--verbose", action="store_true", help="print more information"
)
parser.add_argument(
    "-e",
    "--eval",
    action="store_true",
    help="evaluate replacement as python expression, match available as _",
)

# optional integer argument which defaults to 1
parser.add_argument(
    "-n",
    "--replace_n_matches",
    type=int,
    default=1,
    help="replace n matches (0 for all matches)",
)

args = parser.parse_args()


def eval_replacement(match):
    """if --eval given, evaluate replacment string as Python
    with the variable _ set to the matching part of the filename
    """
    if not args.eval:
        return args.replacement
    _ = match.group(0)
    return str(eval(args.replacement))


for old_pathname in args.filenames:
    try:
        new_pathname = re.sub(
            args.regex, eval_replacement, old_pathname, count=args.replace_n_matches
        )
    except OSError as e:
        print(
            f"{sys.argv[0]}: '{old_pathname}': '{args.replacement}'  {e}",
            file=sys.stderr,
        )
        continue

    if new_pathname == old_pathname:
        if args.verbose:
            print("no change:", old_pathname)
        continue

    if os.path.exists(new_pathname):
        print(f"{sys.argv[0]}: '{new_pathname}' exists", file=sys.stderr)
        continue

    if args.dryrun:
        print(old_pathname, "would be renamed to", new_pathname)
        continue

    if args.verbose:
        print("'renaming", old_pathname, "to", new_pathname)
    try:
        os.rename(old_pathname, new_pathname)
    except OSError as e:
        print(f"{sys.argv[0]}: '{new_pathname}' {e}", file=sys.stderr)

        
            change_names.0.py
        

        
written by andrewtunsw.edu.au as a COMP(2041|9044) lecture example



For each file given as argument replace occurrences of Hermione
allowing for some misspellings with Harry and vice-versa.

Relies on Zaphod not occurring in the text.



    
import re, sys, os

for filename in sys.argv[1:]:
    tmp_filename = filename + ".new"
    if os.path.exists(tmp_filename):
        print(f"{sys.argv[0]}: {tmp_filename} already exists\n", file=sys.stderr)
        sys.exit(1)
    with open(filename) as f:
        with open(tmp_filename, "w") as g:
            for line in f:
                changed_line = re.sub(r"Herm[io]+ne", "Zaphod", line)
                changed_line = changed_line.replace("Harry", "Hermione")
                changed_line = changed_line.replace("Zaphod", "Harry")
                g.write(changed_line)
    os.rename(tmp_filename, filename)

        
            change_names.1.py
        
written by andrewtunsw.edu.au as a COMP(2041|9044) lecture example

For each file given as argument replace occurrences of Hermione
allowing for some misspellings with Harry and vice-versa.

Relies on Zaphod not occurring in the text.

import re, sys, os, shutil, tempfile

for filename in sys.argv[1:]:
    with tempfile.NamedTemporaryFile(mode='w', delete=False) as tmp:
        with open(filename) as f:
            for line in f:
                changed_line = re.sub(r"Herm[io]+ne", "Zaphod", line)
                changed_line = changed_line.replace("Harry", "Hermione")
                changed_line = changed_line.replace("Zaphod", "Harry")
                tmp.write(changed_line)
    shutil.move(tmp.name, filename)

        
            change_names.2.py
        
written by andrewtunsw.edu.au as a COMP(2041|9044) lecture example

For each file given as argument replace occurrences of Hermione
allowing for some misspellings with Harry and vice-versa.

Relies on Zaphod not occurring in the text.

modified text is stored in a list then file over-written

import re, sys, os

for filename in sys.argv[1:]:
    changed_lines = []
    with open(filename) as f:
        for line in f:
            changed_line = re.sub(r"Herm[io]+ne", "Zaphod", line)
            changed_line = changed_line.replace("Harry", "Hermione")
            changed_line = changed_line.replace("Zaphod", "Harry")
            changed_lines.append(changed_line)
    with open(filename, "w") as g:
        g.write("".join(changed_lines))

        
            change_names.3.py
        
written by andrewtunsw.edu.au as a COMP(2041|9044) lecture example

For each file given as argument replace occurrences of Hermione
allowing for some misspellings with Harry and vice-versa.

Relies on Zaphod not occurring in the text.

modified text is stored in a single string then file over-written

import re, sys, os

for filename in sys.argv[1:]:
    changed_lines = []
    with open(filename) as f:
        text = f.read()
    changed_text = re.sub(r"Herm[io]+ne", "Zaphod", text)
    changed_text = changed_text.replace("Harry", "Hermione")
    changed_text = changed_text.replace("Zaphod", "Harry")
    with open(filename, "w") as g:
        g.write("".join(changed_text))

course_codes.tsv

ACCT1501	Accounting & Financial Mgt 1A
ACCT1511	Accounting & Financial Mgt 1B
ACCT2101	Industry Placement 1
ACCT2507	Intro to Accounting Research
ACCT2511	Financial Acct Fundamentals
ACCT2522	Management Acc for Decision
ACCT2532	Management Accounting (Hons)
ACCT2542	Corporate Financial Reporting
ACCT2552	Corporate Financial Rep (Hons)
ACCT2672	Accounting analytics
ACCT3202	Industry Placement 2
ACCT3303	Industry Placement 3
ACCT3563	Issues in Financial Reporting
ACCT3573	Issues in Financial Rep (Hons)
ACCT3583	Strategic Value Management
ACCT3585	E-Business: Strategy & Process
ACCT3593	Management Accounting 2 (Hon)
ACCT3601	Global Financial Reporting &
ACCT3610	Business Analysis & Valuation
ACCT3625	ESG Reporting
ACCT3672	Accounting analytics
ACCT3708	Auditing & Assurance Services
ACCT3718	Auditing &  Assurance (Hons)
ACCT3995	Fraud Examination Fundamentals
ACCT4794	Thesis (Accounting)
ACCT4795	Thesis (Accounting/Finance)
ACCT4796	Thesis (Accounting) A
ACCT4797	Thesis (Accounting) B
ACCT4798	Thesis (Accounting) C
ACCT4802	Advanced Studies in Fin Acc
ACCT4803	Regulation of Accounting
ACCT4804	Development of Acc Thought
...

enrollments.txt

COMP1511|5256571|Ni, Harish|7543|SENGAH|000.000|24T2|19990411|M
COMP1511|5296570|Truong, Jackson|3784/1|MATHNT|000.000|24T2|20060522|M
COMP1911|5228928|Jin, Joyce|3707/2|COMMJ1 COMPA1|000.000|24T2|20020717|F
COMP1511|5220761|Qi, Boyu Mohammad Zehua|3778/2|ACTLD1 COMPA1|000.000|24T2|20040807|M
COMP1521|5278069|Mohammed, Aidan|3778/2|COMPA1|000.000|24T2|20021122|M
COMP2041|5270724|Gao, Ruilin|8959|COMPA1|000.000|24T2|19960906|M
COMP2041|5254894|Pi, Jenny|3707/4|COMPIS|000.000|24T2|19990415|F
COMP1521|5243017|Hossain, Sean|3778/3|COMPA1 MTRNAH|000.000|24T2|20050411|M
COMP9417|5267137|Teng, Ricky|3674/2|COMPA1 ELECF1|000.000|24T2|20060618|M
COMP3511|5251033|Moore, Siyuan|3778/3|COMPA1 MTRNAH|000.000|24T2|20010525|M
COMP4953|5251033|Moore, Siyuan|3778/3|COMPA1 MTRNAH|000.000|24T2|20010525|M
COMP1511|5271164|Chan, Qingyang|8543|COMPA1|000.000|24T2|20010310|M
COMP3900|5294895|Brown, James|8543|COMPBH|000.000|24T2|20021220|M
COMP9311|5271159|Leung, Faith Holly Li|3707/3|COMPA1|000.000|24T2|20080417|F
COMP9417|5271159|Leung, Faith Holly Li|3707/3|COMPA1|000.000|24T2|20080417|F
COMP1531|5205536|Choi, Zechen|3778/2|COMPZS|000.000|24T2|20080805|M
COMP2041|5205536|Choi, Zechen|3778/2|COMPZS|000.000|24T2|20080805|M
COMP1521|5286700|Xiao, Cindy|3674/1|COMPA1|000.000|24T2|20010416|F
COMP2521|5286700|Xiao, Cindy|3674/1|COMPA1|000.000|24T2|20010416|F
COMP1521|5232307|Chai, Leo Arjun|3707/3|COMPAS|000.000|24T2|20050912|M
COMP2521|5232307|Chai, Leo Arjun|3707/3|COMPAS|000.000|24T2|20050912|M
COMP1521|5291022|Jia, Sahil|8543|COMPY1|000.000|24T2|20020428|M
COMP3121|5255876|Widjaja, Abdul Sachin|8543|COMPOS|000.000|24T2|20000918|M
COMP1531|5293314|Xing, William|3778/2|COMPAS|000.000|24T2|20030311|M
COMP3311|5293314|Xing, William|3778/2|COMPAS|000.000|24T2|20030311|M
COMP3331|5293314|Xing, William|3778/2|COMPAS|000.000|24T2|20030311|M
COMP9313|5285757|Niu, Yifan Matt Xiyu Chenhao Jeremy|8543|COMPSS|000.000|24T2|19940720|M
COMP9417|5285757|Niu, Yifan Matt Xiyu Chenhao Jeremy|8543|COMPSS|000.000|24T2|19940720|M
COMP1911|5254326|Turner, Jun|3707/2|COMPLS|000.000|24T2|19970613|M
COMP1531|5297123|Tao, Lara|8543|COMPCH|000.000|24T2|19990204|F
COMP3900|5257966|Sun, Jared|8543|COMPMS|000.000|24T2|20110301|M
COMP6771|5257966|Sun, Jared|8543|COMPMS|000.000|24T2|20110301|M
...