Software Construction

Course Resources

Administrivia: Course Outline | Course Timetable | Help Sessions
Administrivia: COMP2041 Handbook | COMP9044 Handbook
Shell: Shell & Regex Quick Reference Card
Python: Python Quick Reference Card | Python Documentation
Meet the Team: Our Team
Platforms: Lectures (via YouTube) | Tut-Labs (via BbCollaborate (via Moodle)) | Course Forum
Assessment: Autotests, Submissions, Marks | Give online: submission | Give online: sturec
Assignment Specifications: Assignment 1 | Assignment 2

Course Content Week-by-Week

Tutorial
Laboratory
Monday Week 1 Lecture Topics
Thursday Week 1 Lecture Topics
Tutorial
Laboratory
Monday Week 2 Lecture Topics
Thursday Week 2 Lecture Topics
Tutorial
Laboratory
Weekly Test
Thursday Week 3 Lecture Topics
Tutorial
Laboratory
Weekly Test
Monday Week 4 Lecture Topics
Thursday Week 4 Lecture Topics
Tutorial
Laboratory
Weekly Test
Monday Week 5 Lecture Topics
Thursday Week 5 Lecture Topics
Weekly Test
Tutorial
Laboratory
Weekly Test
Monday Week 7 Lecture Topics
Thursday Week 7 Lecture Topics
Tutorial
Laboratory
Weekly Test
Monday Week 8 Lecture Topics
Thursday Week 8 Lecture Topics
Tutorial
Laboratory
Weekly Test
Monday Week 9 Lecture Topics
Thursday Week 9 Lecture Topics
Tutorial
Laboratory
Weekly Test
Monday Week 10 Lecture Topics
Thursday Week 10 Lecture Topics

Course Content Topic-by-Topic

Course Intro
Filters
regex101: online regex tester RegExr: online regex tester

#include <stdio.h>
#include <stdlib.h>

// write bytes of stream to stdout
void process_stream(FILE *stream) {
    int byte;
    while ((byte = fgetc(stream)) != EOF) {
        if (fputc(byte, stdout) == EOF) {
            perror("cat:");
            exit(1);
        }
    }
}

// process files given as arguments
// if no arguments process stdin
int main(int argc, char *argv[]) {

    if (argc == 1) {
        process_stream(stdin);
    } else {
        for (int i = 1; i < argc; i++) {
            FILE *in = fopen(argv[i], "r");
            if (in == NULL) {
                fprintf(stderr, "%s: %s: ", argv[0], argv[i]);
                perror("");
                return 1;
            }
            process_stream(in);
            fclose(in);
        }
    }

    return 0;
}

Download cat.c



import sys


def process_stream(stream):
    """
    copy bytes of f to stdout
    """
    for line in stream:
        print(line, end="")


def main():
    """
    process files given as arguments, if no arguments process stdin
    """
    if not sys.argv[1:]:
        process_stream(sys.stdin)
    else:
        for pathname in sys.argv[1:]:
            with open(pathname, "r") as f:
                process_stream(f)


if __name__ == "__main__":
    main()

Download cat.py



#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// print lines containing the specified substring
void process_stream(FILE *stream, char *name, char *substring) {
    char *line = NULL;
    size_t line_size = 0;
    int line_number = 1;

    while (getline(&line, &line_size, stream) > 0) {
        if (strstr(line, substring) != NULL) {
            printf("%s:%d:%s", name, line_number, line);
        }
        line_number++;
    }

    free(line);
}

// process files given as arguments
// if no arguments process stdin
int main(int argc, char *argv[]) {

    if (argc == 2) {
        process_stream(stdin, "<stdin>", argv[1]);
    } else {
        for (int i = 2; i < argc; i++) {
            FILE *in = fopen(argv[i], "r");
            if (in == NULL) {
                fprintf(stderr, "%s: %s: ", argv[0], argv[i]);
                perror("");
                return 1;
            }
            process_stream(in, argv[i], argv[1]);
            fclose(in);
        }
    }

    return 0;
}

Download fgrep.c



import sys


def process_stream(f, name, substring):
    """
    print lines containing substring
    """
    for (line_number, line) in enumerate(f, start=1):
        if substring in line:
            print(f'{name}:{line_number}:{line}', end='')


def main():
    """
    process files given as arguments, if no arguments process stdin
    """
    if len(sys.argv) == 2:
        process_stream(sys.stdin, "<stdin>", sys.argv[1])
    elif len(sys.argv) > 2:
        for pathname in sys.argv[2:]:
            with open(pathname, 'r') as f:
                process_stream(f, pathname, sys.argv[1])


if __name__ == '__main__':
    main()

Download fgrep.py


#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>

// count lines, words, chars in stream
// assumes Unix-like line separator '\n'
// breaks on other platforms, see https://en.wikipedia.org/wiki/Newline

void process_stream(FILE *in, char *name) {
    int n_lines = 0;
    int n_words = 0;
    int n_chars = 0;
    int in_word = 0;
    int c;

    while ((c = fgetc(in)) != EOF) {
        n_chars++;

        if (c == '\n') {
            n_lines++;
        }

        if (isspace(c)) {
            in_word = 0;
        } else if (!in_word) {
            in_word = 1;
            n_words++;
        }
    }

    printf("%d %d %d %s\n", n_lines, n_words, n_chars, name);
}

// process files given as arguments
// if no arguments process stdin
int main(int argc, char *argv[]) {
    if (argc == 1) {
        process_stream(stdin, "stdin");
    } else {
        for (int i = 1; i < argc; i++) {
            FILE *in = fopen(argv[i], "r");
            if (in == NULL) {
                fprintf(stderr, "%s: %s: ", argv[0], argv[i]);
                perror("");
                return 1;
            }
            process_stream(in, argv[i]);
            fclose(in);
        }
    }
    return 0;
}

Download wc.c



import sys
import os


def process_stream(stream):
    """
    count lines, words, chars in stream
    """
    lines = 0
    words = 0
    characters = 0
    for line in stream:
        lines += line.endswith(os.linesep)
        words += len(line.split())
        characters += len(line)
    print(f"{lines:>6} {words:>6} {characters:>6}", end="")


def main():
    """
    process files given as arguments, if no arguments process stdin
    """
    if not sys.argv[1:]:
        process_stream(sys.stdin)
    else:
        for pathname in sys.argv[1:]:
            with open(pathname, "r") as f:
                process_stream(f)
                print(f" {pathname}")


if __name__ == "__main__":
    main()

Download wc.py


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

// cope stream to stdout except for repeated lines
void process_stream(FILE *stream) {
    char *line = NULL;
    size_t line_size = 0;
    char *last_line = NULL;
    size_t last_line_size = 0;

    while (getline(&line, &line_size, stream) > 0) {
        if (last_line == NULL || strcmp(line, last_line) != 0) {
            fputs(line, stdout);
        }

        // grow last_line if line has grown
        if (last_line_size != line_size) {
            last_line = realloc(last_line, line_size);
            assert(last_line != NULL);
            last_line_size = line_size;
        }

        strncpy(last_line, line, line_size);
    }

    free(line);
    free(last_line);
}

// process files given as arguments
// if no arguments process stdin
int main(int argc, char *argv[]) {
    if (argc == 1) {
        process_stream(stdin);
    } else {
        FILE *in = fopen(argv[1], "r");
        if (in == NULL) {
            fprintf(stderr, "%s: %s: ", argv[0], argv[1]);
            perror("");
            return 1;
        }
        process_stream(in);
        fclose(in);
    }

    return 0;
}

Download uniq.c



import sys


def process_stream(stream):
    """
    copy stream to stdout except for repeated lines
    """
    last_line = None
    for line in stream:
        if last_line is None or line != last_line:
            print(line, end='')
        last_line = line


def main():
    """
    process files given as arguments, if no arguments process stdin
    """
    if not sys.argv[1:]:
        process_stream(sys.stdin)
    else:
        for pathname in sys.argv[1:]:
            with open(pathname, 'r') as f:
                process_stream(f)


if __name__ == '__main__':
    main()

Download uniq.py



import sys


def process_stream(f):
    """
    print lines of stream in sorted order
    """
    print("".join(sorted(f)), end="")


def main():
    """
    process files given as arguments, if no arguments process stdin
    """
    if len(sys.argv) == 1:
        process_stream(sys.stdin)
    else:
        with open(sys.argv[1], 'r') as f:
            process_stream(f)


if __name__ == '__main__':
    main()

Download sort.py



import subprocess
import sys

# the real xargs runs the command multiple times if input is large
# the real xargs treats quotes specially


def main():
    input_words = [w for line in sys.stdin for w in line.split()]
    command = sys.argv[1:]
    subprocess.run(command + input_words)


if __name__ == "__main__":
    main()

Download xargs.py

This file contains examples of the use of the most common Unix filter programs (grep, wc, head, etc.) It also contains solutions to the exercises discussed in lectures.
  1. Consider a a file course_codes.tsv containing UNSW course codes and names.
    ls -l course_codes.tsv
    -rw-r--r-- 1 cs2041 cs2041 137255 Jul 14  2022 course_codes.tsv
    
    wc course_codes.tsv
      4159  17806 137255 course_codes.tsv
    
    head course_codes.tsv
    ACCT1501	Accounting & Financial Mgt 1A
    ACCT1511	Accounting & Financial Mgt 1B
    ACCT2101	Industry Placement 1
    ACCT2511	Financial Acct Fundamentals
    ACCT2522	Management Accounting 1
    ACCT2542	Corporate Financial Reporting
    ACCT2672	Accounting analytics
    ACCT3202	Industry Placement 2
    ACCT3303	Industry Placement 3
    ACCT3563	Issues in Financial Reporting
    
    It looks like the code is separated from the title by a number of spaces. We can check this via cat -A:
    head -5 course_codes.tsv | cat -A
    ACCT1501^IAccounting & Financial Mgt 1A$
    ACCT1511^IAccounting & Financial Mgt 1B$
    ACCT2101^IIndustry Placement 1$
    ACCT2511^IFinancial Acct Fundamentals$
    ACCT2522^IManagement Accounting 1$
    
    This shows us that our initial guess was wrong, and there's actually a tab character between the course code and title (shown as ^I by cat -A). Also, the location of the end-of-line marker ($) indicates that there are no trailing spaces or tabs.

    If we need to know what COMP courses there are:

    grep -E -c COMP course_codes.tsv
    115
    
    grep -E COMP course_codes.tsv
    COMP1010	The Art of Computing
    COMP1511	Programming Fundamentals
    COMP1521	Computer Systems Fundamentals
    COMP1531	Software Eng Fundamentals
    COMP1911	Computing 1A
    COMP2041	Software Construction
    COMP2111	System Modelling and Design
    COMP2511	O-O Design & Programming
    COMP2521	Data Structures and Algorithms
    COMP3121	Algorithms & Programming Tech
    COMP3131	Programming Languages & Compil
    COMP3141	Software Sys Des&Implementat'n
    COMP3151	Foundations of Concurrency
    COMP3153	Algorithmic Verification
    COMP3161	Concepts of Programming Lang.
    COMP3211	Computer Architecture
    COMP3222	Digital Circuits and Systems
    COMP3231	Operating Systems
    COMP3311	Database Systems
    COMP3331	Computer Networks&Applications
    COMP3411	Artificial Intelligence
    COMP3421	Computer Graphics
    COMP3431	Robotic Software Architecture
    COMP3511	Human Computer Interaction
    COMP3601	Design Project A
    COMP3821	Ext Algorithms&Prog Techniques
    COMP3891	Ext Operating Systems
    COMP3900	Computer Science Project
    COMP3901	Special Project A
    COMP3902	Special Project B
    COMP4121	Advanced Algorithms
    COMP4128	Programming Challenges
    COMP4141	Theory of Computation
    COMP4161	Advanced Verification
    COMP4336	Mobile Data Networking
    COMP4337	Securing Fixed & Wireless Netw
    COMP4418	Knowledge Representation
    COMP4511	User Interface Design & Constr
    COMP4601	Design Project B
    COMP4920	Professional Issues and Ethics
    COMP4951	Research Thesis A
    COMP4952	Research Thesis B
    COMP4953	Research Thesis C
    COMP4961	Computer Science Thesis A
    COMP4962	Computer Science Thesis B
    COMP4963	Computer Science Thesis C
    COMP6080	Web Front-End Programming
    COMP6441	Security Engineering
    COMP6443	Web Application Security
    COMP6445	Digital Forensics
    COMP6447	Security Assessment
    COMP6448	Security Masterclass
    COMP6451	Cryptocurrency and DLT
    COMP6452	Blockchain App Architecture
    COMP6714	Info Retrieval and Web Search
    COMP6721	(In-)Formal Methods
    COMP6733	Internet of Things
    COMP6741	Algorithms for Intractable Pbs
    COMP6752	Modelling Concurrent Systems
    COMP6771	Advanced C++ Programming
    COMP6841	Extended Security Engineering
    COMP6843	Extended WebApp Security
    COMP6845	Extended Digital Forensics
    COMP9020	Foundations of Comp. Science
    COMP9021	Principles of Programming
    COMP9024	Data Structures & Algorithms
    COMP9032	Microprocessors & Interfacing
    COMP9044	Software Construction
    COMP9101	Design &Analysis of Algorithms
    COMP9102	Programming Lang & Compilers
    COMP9153	Algorithmic Verification
    COMP9154	Foundations of Concurrency
    COMP9164	Concepts of Programming Lang.
    COMP9201	Operating Systems
    COMP9211	Computer Architecture
    COMP9222	Digital Circuits and Systems
    COMP9242	Advanced Operating Systems
    COMP9243	Distributed Systems
    COMP9283	Ext Operating Systems
    COMP9301	Cyber Security Project
    COMP9302	Cyber Security Project B
    COMP9311	Database Systems
    COMP9312	Data Analytics for Graphs
    COMP9313	Big Data Management
    COMP9315	Database Systems Implementat'n
    COMP9319	Web Data Compression & Search
    COMP9321	Data Services Engineering
    COMP9322	Software Service Design & Eng
    COMP9323	e-Enterprise Project
    COMP9331	Computer Networks&Applications
    COMP9332	Network Routing and Switching
    COMP9334	Systems Capacity Planning
    COMP9336	Mobile Data Networking
    COMP9337	Securing Fixed & Wireless Netw
    COMP9414	Artificial Intelligence
    COMP9415	Computer Graphics
    COMP9417	Machine Learning & Data Mining
    COMP9418	Advanced Machine Learning
    COMP9434	Robotic Software Architecture
    COMP9444	Neural Networks, Deep Learning
    COMP9447	Security Engineering Workshop
    COMP9491	Applied AI
    COMP9511	Human Computer Interaction
    COMP9517	Computer Vision
    COMP9596	Research Project
    COMP9727	Recommender Systems
    COMP9801	Ext Design&Analysis of Algo
    COMP9814	Ext Artificial Intelligence
    COMP9900	Info Tech Project
    COMP9901	P/T Res. Thesis Comp Sci & Eng
    COMP9902	Res. Thesis Comp Sci & Eng F/T
    COMP9945	Research Project
    COMP9991	Research Project A
    COMP9992	Research Project B
    COMP9993	Research Project C
    
    Either of the two commands below tell us which courses have "comp" in their name or code (in upper or lower case).
    tr A-Z a-z <course_codes.tsv | grep -E comp
    benv7503	geocomputation
    binf3020	computational bioinformatics
    binf9020	computational bioinformatics
    biom9332	biocompatibility
    ceic6711	complex fluids
    chem3031	inorg chem:trans metals & comp
    code1110	computational design theory 1
    code1210	computational design theory 2
    code2121	advanced computational design
    code2132	computational design studio iv
    comp1010	the art of computing
    comp1511	programming fundamentals
    comp1521	computer systems fundamentals
    comp1531	software eng fundamentals
    comp1911	computing 1a
    comp2041	software construction
    comp2111	system modelling and design
    comp2511	o-o design & programming
    comp2521	data structures and algorithms
    comp3121	algorithms & programming tech
    comp3131	programming languages & compil
    comp3141	software sys des&implementat'n
    comp3151	foundations of concurrency
    comp3153	algorithmic verification
    comp3161	concepts of programming lang.
    comp3211	computer architecture
    comp3222	digital circuits and systems
    comp3231	operating systems
    comp3311	database systems
    comp3331	computer networks&applications
    comp3411	artificial intelligence
    comp3421	computer graphics
    comp3431	robotic software architecture
    comp3511	human computer interaction
    comp3601	design project a
    comp3821	ext algorithms&prog techniques
    comp3891	ext operating systems
    comp3900	computer science project
    comp3901	special project a
    comp3902	special project b
    comp4121	advanced algorithms
    comp4128	programming challenges
    comp4141	theory of computation
    comp4161	advanced verification
    comp4336	mobile data networking
    comp4337	securing fixed & wireless netw
    comp4418	knowledge representation
    comp4511	user interface design & constr
    comp4601	design project b
    comp4920	professional issues and ethics
    comp4951	research thesis a
    comp4952	research thesis b
    comp4953	research thesis c
    comp4961	computer science thesis a
    comp4962	computer science thesis b
    comp4963	computer science thesis c
    comp6080	web front-end programming
    comp6441	security engineering
    comp6443	web application security
    comp6445	digital forensics
    comp6447	security assessment
    comp6448	security masterclass
    comp6451	cryptocurrency and dlt
    comp6452	blockchain app architecture
    comp6714	info retrieval and web search
    comp6721	(in-)formal methods
    comp6733	internet of things
    comp6741	algorithms for intractable pbs
    comp6752	modelling concurrent systems
    comp6771	advanced c++ programming
    comp6841	extended security engineering
    comp6843	extended webapp security
    comp6845	extended digital forensics
    comp9020	foundations of comp. science
    comp9021	principles of programming
    comp9024	data structures & algorithms
    comp9032	microprocessors & interfacing
    comp9044	software construction
    comp9101	design &analysis of algorithms
    comp9102	programming lang & compilers
    comp9153	algorithmic verification
    comp9154	foundations of concurrency
    comp9164	concepts of programming lang.
    comp9201	operating systems
    comp9211	computer architecture
    comp9222	digital circuits and systems
    comp9242	advanced operating systems
    comp9243	distributed systems
    comp9283	ext operating systems
    comp9301	cyber security project
    comp9302	cyber security project b
    comp9311	database systems
    comp9312	data analytics for graphs
    comp9313	big data management
    comp9315	database systems implementat'n
    comp9319	web data compression & search
    comp9321	data services engineering
    comp9322	software service design & eng
    comp9323	e-enterprise project
    comp9331	computer networks&applications
    comp9332	network routing and switching
    comp9334	systems capacity planning
    comp9336	mobile data networking
    comp9337	securing fixed & wireless netw
    comp9414	artificial intelligence
    comp9415	computer graphics
    comp9417	machine learning & data mining
    comp9418	advanced machine learning
    comp9434	robotic software architecture
    comp9444	neural networks, deep learning
    comp9447	security engineering workshop
    comp9491	applied ai
    comp9511	human computer interaction
    comp9517	computer vision
    comp9596	research project
    comp9727	recommender systems
    comp9801	ext design&analysis of algo
    comp9814	ext artificial intelligence
    comp9900	info tech project
    comp9901	p/t res. thesis comp sci & eng
    comp9902	res. thesis comp sci & eng f/t
    comp9945	research project
    comp9991	research project a
    comp9992	research project b
    comp9993	research project c
    cven2002	engineering computations
    cven9820	computational struct mechanics
    cven9822	steel & composite structures
    dpst1092	computer systems fundamentals
    elec4605	quantum devices and computers
    elec4632	computer control systems
    engg1811	computing for engineers
    fins3649	compliance & regulatory tech
    fins5549	regtech and compliance
    fndn0301	computing studies
    fndn0303	computing for design
    fndn0304	computing for academic purpose
    fndn0305	computing for acad purpose h
    fndn0306	computing for acad purposes s
    fndn0311	computing studies - t
    fndn0312	computing academic purpose - t
    fndn0314	computing for design - t
    fndn0315	computing for acad purpose th
    fndn0316	computing for acad purp - ts
    gmat2500	surveying computations a
    gmat2550	surveying computations b
    hdat9300	computing 4 hdat
    ides3231	adv computer aided product des
    infs4858	managing complex projects
    infs5848	managing complex projects
    inta1002	interior arch composition 2
    inta1003	interior arch composition 3
    jurd7468	aust legal system comp perspec
    jurd7522	competition law
    jurd7549	child rights comp clinic
    jurd7603	global issues in comp policy
    jurd7610	mediation competition
    jurd7616	international & comparative ip
    laws3022	competition law
    laws3368	aust legal system comp perspec
    laws3510	mediation competition
    laws3549	child rights comp clinic
    laws8016	international & comparative ip
    laws8168	aust legal system comp perspec
    laws8203	global issues in comp policy
    laws8349	child rights comp clinic
    manf9543	comp aided design/manufacture
    math2301	mathematical computing
    math2521	complex analysis
    math2621	higher complex analysis
    math3101	comp maths science & eng
    math3311	comp mathematics for finance
    math3821	stat modelling & computing
    math3871	bayesian inference and comp
    math5305	comp maths science & eng
    math5335	comp mathematics for finance
    math5856	intro to stats and stat comput
    math5960	bayesian inference & comput'n
    mats4005	composites and functional mats
    mats6110	computational materials
    mech4620	computational fluid dynamics
    mech9420	composite materials and mechan
    mgmt2106	comparative management systems
    mtrn2500	comp for mtrn
    mtrn3500	comp appl in mechatonic sys
    phcm9471	comparative h'lth care systems
    ptrl5016	well completions & stimulation
    ptrl6016	well completions & stimulation
    socf5112	complex practice issues
    tabl5544	comparative tax systems
    tabl5545	principles of reg, comp&enfor
    tabl5546	intro to regtech in comp&enfor
    tabl5547	building effec. comp. sys
    ymed3006	comparative health systems
    zeit9100	computer science research f/t
    zeit9101	computer science research p/t
    
    grep -E -i comp course_codes.tsv
    BENV7503	Geocomputation
    BINF3020	Computational Bioinformatics
    BINF9020	Computational Bioinformatics
    BIOM9332	Biocompatibility
    CEIC6711	Complex Fluids
    CHEM3031	Inorg Chem:Trans Metals & Comp
    CODE1110	Computational Design Theory 1
    CODE1210	Computational Design Theory 2
    CODE2121	Advanced Computational Design
    CODE2132	Computational Design Studio IV
    COMP1010	The Art of Computing
    COMP1511	Programming Fundamentals
    COMP1521	Computer Systems Fundamentals
    COMP1531	Software Eng Fundamentals
    COMP1911	Computing 1A
    COMP2041	Software Construction
    COMP2111	System Modelling and Design
    COMP2511	O-O Design & Programming
    COMP2521	Data Structures and Algorithms
    COMP3121	Algorithms & Programming Tech
    COMP3131	Programming Languages & Compil
    COMP3141	Software Sys Des&Implementat'n
    COMP3151	Foundations of Concurrency
    COMP3153	Algorithmic Verification
    COMP3161	Concepts of Programming Lang.
    COMP3211	Computer Architecture
    COMP3222	Digital Circuits and Systems
    COMP3231	Operating Systems
    COMP3311	Database Systems
    COMP3331	Computer Networks&Applications
    COMP3411	Artificial Intelligence
    COMP3421	Computer Graphics
    COMP3431	Robotic Software Architecture
    COMP3511	Human Computer Interaction
    COMP3601	Design Project A
    COMP3821	Ext Algorithms&Prog Techniques
    COMP3891	Ext Operating Systems
    COMP3900	Computer Science Project
    COMP3901	Special Project A
    COMP3902	Special Project B
    COMP4121	Advanced Algorithms
    COMP4128	Programming Challenges
    COMP4141	Theory of Computation
    COMP4161	Advanced Verification
    COMP4336	Mobile Data Networking
    COMP4337	Securing Fixed & Wireless Netw
    COMP4418	Knowledge Representation
    COMP4511	User Interface Design & Constr
    COMP4601	Design Project B
    COMP4920	Professional Issues and Ethics
    COMP4951	Research Thesis A
    COMP4952	Research Thesis B
    COMP4953	Research Thesis C
    COMP4961	Computer Science Thesis A
    COMP4962	Computer Science Thesis B
    COMP4963	Computer Science Thesis C
    COMP6080	Web Front-End Programming
    COMP6441	Security Engineering
    COMP6443	Web Application Security
    COMP6445	Digital Forensics
    COMP6447	Security Assessment
    COMP6448	Security Masterclass
    COMP6451	Cryptocurrency and DLT
    COMP6452	Blockchain App Architecture
    COMP6714	Info Retrieval and Web Search
    COMP6721	(In-)Formal Methods
    COMP6733	Internet of Things
    COMP6741	Algorithms for Intractable Pbs
    COMP6752	Modelling Concurrent Systems
    COMP6771	Advanced C++ Programming
    COMP6841	Extended Security Engineering
    COMP6843	Extended WebApp Security
    COMP6845	Extended Digital Forensics
    COMP9020	Foundations of Comp. Science
    COMP9021	Principles of Programming
    COMP9024	Data Structures & Algorithms
    COMP9032	Microprocessors & Interfacing
    COMP9044	Software Construction
    COMP9101	Design &Analysis of Algorithms
    COMP9102	Programming Lang & Compilers
    COMP9153	Algorithmic Verification
    COMP9154	Foundations of Concurrency
    COMP9164	Concepts of Programming Lang.
    COMP9201	Operating Systems
    COMP9211	Computer Architecture
    COMP9222	Digital Circuits and Systems
    COMP9242	Advanced Operating Systems
    COMP9243	Distributed Systems
    COMP9283	Ext Operating Systems
    COMP9301	Cyber Security Project
    COMP9302	Cyber Security Project B
    COMP9311	Database Systems
    COMP9312	Data Analytics for Graphs
    COMP9313	Big Data Management
    COMP9315	Database Systems Implementat'n
    COMP9319	Web Data Compression & Search
    COMP9321	Data Services Engineering
    COMP9322	Software Service Design & Eng
    COMP9323	e-Enterprise Project
    COMP9331	Computer Networks&Applications
    COMP9332	Network Routing and Switching
    COMP9334	Systems Capacity Planning
    COMP9336	Mobile Data Networking
    COMP9337	Securing Fixed & Wireless Netw
    COMP9414	Artificial Intelligence
    COMP9415	Computer Graphics
    COMP9417	Machine Learning & Data Mining
    COMP9418	Advanced Machine Learning
    COMP9434	Robotic Software Architecture
    COMP9444	Neural Networks, Deep Learning
    COMP9447	Security Engineering Workshop
    COMP9491	Applied AI
    COMP9511	Human Computer Interaction
    COMP9517	Computer Vision
    COMP9596	Research Project
    COMP9727	Recommender Systems
    COMP9801	Ext Design&Analysis of Algo
    COMP9814	Ext Artificial Intelligence
    COMP9900	Info Tech Project
    COMP9901	P/T Res. Thesis Comp Sci & Eng
    COMP9902	Res. Thesis Comp Sci & Eng F/T
    COMP9945	Research Project
    COMP9991	Research Project A
    COMP9992	Research Project B
    COMP9993	Research Project C
    CVEN2002	Engineering Computations
    CVEN9820	Computational Struct Mechanics
    CVEN9822	Steel & Composite Structures
    DPST1092	Computer Systems Fundamentals
    ELEC4605	Quantum Devices and Computers
    ELEC4632	Computer Control Systems
    ENGG1811	Computing for Engineers
    FINS3649	Compliance & Regulatory Tech
    FINS5549	RegTech and Compliance
    FNDN0301	Computing Studies
    FNDN0303	Computing for Design
    FNDN0304	Computing for Academic Purpose
    FNDN0305	Computing for Acad Purpose H
    FNDN0306	Computing for Acad Purposes S
    FNDN0311	Computing Studies - T
    FNDN0312	Computing Academic Purpose - T
    FNDN0314	Computing for Design - T
    FNDN0315	Computing for Acad Purpose TH
    FNDN0316	Computing for Acad Purp - TS
    GMAT2500	Surveying Computations A
    GMAT2550	Surveying Computations B
    HDAT9300	Computing 4 HDAT
    IDES3231	Adv Computer Aided Product Des
    INFS4858	Managing Complex Projects
    INFS5848	Managing Complex Projects
    INTA1002	Interior Arch Composition 2
    INTA1003	Interior Arch Composition 3
    JURD7468	Aust Legal System Comp Perspec
    JURD7522	Competition Law
    JURD7549	Child Rights Comp Clinic
    JURD7603	Global Issues in Comp Policy
    JURD7610	Mediation Competition
    JURD7616	International & Comparative IP
    LAWS3022	Competition Law
    LAWS3368	Aust Legal System Comp Perspec
    LAWS3510	Mediation Competition
    LAWS3549	Child Rights Comp Clinic
    LAWS8016	International & Comparative IP
    LAWS8168	Aust Legal System Comp Perspec
    LAWS8203	Global Issues in Comp Policy
    LAWS8349	Child Rights Comp Clinic
    MANF9543	Comp Aided Design/Manufacture
    MATH2301	Mathematical Computing
    MATH2521	Complex Analysis
    MATH2621	Higher Complex Analysis
    MATH3101	Comp Maths Science & Eng
    MATH3311	Comp Mathematics for Finance
    MATH3821	Stat Modelling & Computing
    MATH3871	Bayesian Inference and Comp
    MATH5305	Comp Maths Science & Eng
    MATH5335	Comp Mathematics for Finance
    MATH5856	Intro to Stats and Stat Comput
    MATH5960	Bayesian Inference & Comput'n
    MATS4005	Composites and Functional Mats
    MATS6110	Computational Materials
    MECH4620	Computational Fluid Dynamics
    MECH9420	Composite Materials and Mechan
    MGMT2106	Comparative Management Systems
    MTRN2500	Comp for MTRN
    MTRN3500	Comp Appl in Mechatonic Sys
    PHCM9471	Comparative H'lth Care Systems
    PTRL5016	Well Completions & Stimulation
    PTRL6016	Well Completions & Stimulation
    SOCF5112	Complex Practice Issues
    TABL5544	Comparative Tax Systems
    TABL5545	Principles of Reg, Comp&Enfor
    TABL5546	Intro to RegTech in Comp&Enfor
    TABL5547	Building Effec. Comp. Sys
    YMED3006	Comparative Health Systems
    ZEIT9100	Computer Science Research F/T
    ZEIT9101	Computer Science Research P/T
    
    The second one looks better because the data itself isn't transformed, only the internal comparisons.

    If we want to know how many courses have "computing" or "computer" in their title, we have to use grep -E, which recognises the alternative operator "|", and wc to count the number of matches. There are a couple of ways to construct the regexp:

    grep -E -i 'computer|computing' course_codes.tsv | wc
         36     159    1236
    
    grep -E -i 'comput(er|ing)' course_codes.tsv | wc
         36     159    1236
    
    If you don't like the irrelevant word and character counts, use wc -l.

    Most of these 80 matches were CSE offerings, whose course codes begin with COMP, SENG or BINF. Which of the matches were courses offered by other schools?

    Think about it for a moment.... There's no "but not" regexp operator, so instead we construct a composite filter with an extra step to deal with eliminating the CSE courses:

    grep -E -i 'computer|computing' course_codes.tsv | grep -E -v '^(COMP|SENG|BINF)'
    DPST1092	Computer Systems Fundamentals
    ELEC4605	Quantum Devices and Computers
    ELEC4632	Computer Control Systems
    ENGG1811	Computing for Engineers
    FNDN0301	Computing Studies
    FNDN0303	Computing for Design
    FNDN0304	Computing for Academic Purpose
    FNDN0305	Computing for Acad Purpose H
    FNDN0306	Computing for Acad Purposes S
    FNDN0311	Computing Studies - T
    FNDN0312	Computing Academic Purpose - T
    FNDN0314	Computing for Design - T
    FNDN0315	Computing for Acad Purpose TH
    FNDN0316	Computing for Acad Purp - TS
    HDAT9300	Computing 4 HDAT
    IDES3231	Adv Computer Aided Product Des
    MATH2301	Mathematical Computing
    MATH3821	Stat Modelling & Computing
    ZEIT9100	Computer Science Research F/T
    ZEIT9101	Computer Science Research P/T
    
    The last ones are from the Computer Science school at ADFA.
  2. Consider a file called enrollments.txt which contains data about student enrollment in courses. There is one line for each student enrolled in a course:
    ls -l enrollments.txt
    -rw-r--r-- 1 cs2041 cs2041 685532 Jul 14  2022 enrollments.txt
    
    wc enrollments.txt
      9832  24651 685532 enrollments.txt
    
    head enrollments.txt
    COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M
    COMP4952|5200422|Ji, Aidan Han|3778/2|SENGAH|083.357|22T2|19960203|M
    COMP1511|5296066|Xue, Marko|8543|COMPZ1|080.000|22T2|19980306|M
    COMP1511|5294960|Zhou, Yvonne|3784/3|COMPA1|085.583|22T2|20001009|F
    COMP9511|5242743|Luong, Zihan|3707/1|BINFB1|068.000|22T2|20060117|F
    COMP1911|5259200|Wang, Jiaqi Joanne|8543|COMPBH|081.125|22T2|19950826|F
    COMP9902|5241298|Li, Laura|8543|COMPI1|084.000|22T2|20000330|F
    COMP9024|5245288|Thomas, George|3789/2|SENGAH|079.923|22T2|19970427|M
    COMP2041|5298688|Ng, Henry|8543|COMPA1|076.000|22T2|20031212|M
    COMP3141|5298688|Ng, Henry|8543|COMPA1|076.000|22T2|20031212|M
    
    The following commands count how many students are enrolled in COMP2041 or COMP9041. The course IDs differ only in one character, so a character class is used instead of alternation.

    The first version below is often ferred because initially you may want to know "how many xxx", then having found that out the next question might be, "well give me a sample of 10 or so of them". Then it's a simple matter of replacing wc by head.

    grep -E '^COMP(2041|9044)' enrollments.txt | wc -l
    802
    
    grep -E -c '^COMP(2041|9044)' enrollments.txt
    802
    
    The last field field in the enrollment file records the student's gender. This command counts the number of female students enrolled in the courses.
    grep -E '^COMP(2041|9044)' enrollments.txt | grep -E 'F$' | wc -l
    193
    
    Not a very good gender balance, is it?

    By the way, the two grep -Es could have been combined into one. How?

    This command will give a sorted list of course codes:

    cut -d'|' -f1 enrollments.txt | sort | uniq
    COMP1010
    COMP1511
    COMP1521
    COMP1531
    COMP1911
    COMP2041
    COMP2511
    COMP2521
    COMP3121
    COMP3141
    COMP3151
    COMP3153
    COMP3331
    COMP3511
    COMP3900
    COMP3901
    COMP4336
    COMP4601
    COMP4951
    COMP4952
    COMP4953
    COMP4961
    COMP4962
    COMP4963
    COMP6443
    COMP6447
    COMP6452
    COMP6721
    COMP6741
    COMP6771
    COMP6843
    COMP9020
    COMP9021
    COMP9024
    COMP9044
    COMP9101
    COMP9153
    COMP9154
    COMP9242
    COMP9311
    COMP9312
    COMP9313
    COMP9319
    COMP9323
    COMP9331
    COMP9336
    COMP9414
    COMP9417
    COMP9444
    COMP9447
    COMP9491
    COMP9511
    COMP9517
    COMP9727
    COMP9900
    COMP9901
    COMP9902
    COMP9991
    COMP9992
    COMP9993
    
    The student records system known to users as myUNSW is built on top of a large US product known as PeopleSoft (the company was taken over by Oracle in 2004). On a scale of 1 to 10 the quality of the design of this product is about 3. One of its many flaws is its insistence that everybody must have two names, a "Last Name" and a "First Name", neither of which can be empty. To signify that a person has only a single name (common in Sri Lanka, for example), the system stores a dot character in the "First Name" field. The enrollments file shows the data as stored in the system, with a comma and space separating the component names. It has some single-named people (note that the names themselves have been disguised):
    grep -E ', \.' enrollments.txt
    COMP1511|5230122|So, .|8543|COMPAS|075.286|22T2|19970908|F
    COMP2511|5240102|Lu, .|1650|COMPCS|083.929|22T2|19951024|M
    COMP3331|5240102|Lu, .|1650|COMPCS|083.929|22T2|19951024|M
    COMP2511|5224053|Tran, .|3785/3|COMPY1|080.033|22T2|20031206|F
    COMP3121|5224053|Tran, .|3785/3|COMPY1|080.033|22T2|20031206|F
    COMP3331|5224053|Tran, .|3785/3|COMPY1|080.033|22T2|20031206|F
    COMP3511|5281400|Park, .|8543|COMPA1|073.900|22T2|19940714|F
    COMP9900|5280116|Lim, .|3778/2|ELECAH|072.563|22T2|20030705|M
    COMP1531|5258398|Mai, .|8543|COMPCS|076.133|22T2|19950918|F
    COMP9313|5279423|Patel, .|3782/3|COMPA1|073.737|22T2|19990814|M
    COMP9414|5279423|Patel, .|3782/3|COMPA1|073.737|22T2|19990814|M
    COMP6771|5287674|Bui, .|3706/3|COMPA1|076.000|22T2|20001026|M
    COMP9331|5287674|Bui, .|3706/3|COMPA1|076.000|22T2|20001026|M
    COMP1531|5229312|So, .|8543|COMPA1|070.889|22T2|19971005|F
    COMP1511|5236671|Tan, .|3764/5|COMPA1|065.333|22T2|20030519|F
    COMP2041|5254501|Guo, .|8543|COMPFR|056.400|22T2|20000617|F
    COMP2521|5254501|Guo, .|8543|COMPFR|056.400|22T2|20000617|F
    COMP2511|5282825|Xie, .|3778/2|COMPA1 FINSA1|076.333|22T2|20030117|F
    COMP2521|5289353|Zheng, .|8543|COMPFR|073.952|22T2|20041116|F
    COMP1511|5240422|Yuan, .|8543|BINFAH|034.667|22T2|19950529|F
    COMP1521|5279246|Lam, .|3778/3|SENGAH|087.344|22T2|19941123|F
    COMP3121|5280766|Tian, .|3785/2|COMPA1 MATHP1|000.000|22T2|19990117|F
    COMP6721|5280766|Tian, .|3785/2|COMPA1 MATHP1|000.000|22T2|19990117|F
    COMP6771|5280766|Tian, .|3785/2|COMPA1 MATHP1|000.000|22T2|19990117|F
    COMP9313|5274705|Rahman, .|8543|COMPA1|092.167|22T2|20010611|M
    COMP3900|5291673|Qin, .|3778/3|COMPIS|079.375|22T2|20060512|M
    COMP9020|5205426|Zhao, .|3784/4|SENGAH|074.692|22T2|19941106|F
    COMP3511|5260150|Feng, .|3764/1|COMPAS|067.455|22T2|20000108|F
    COMP3331|5232121|Lim, .|3707/2|ECONA1 MATHK1|072.000|22T2|20001229|F
    COMP6452|5232121|Lim, .|3707/2|ECONA1 MATHK1|072.000|22T2|20001229|F
    COMP9417|5281669|Lu, .|3961/1|COMPAS|077.500|22T2|20000612|F
    COMP9517|5281669|Lu, .|3961/1|COMPAS|077.500|22T2|20000612|F
    COMP9902|5267614|Jiang, .|3789/1|COMPA1|078.667|22T2|20040623|F
    COMP9902|5277612|Cai, .|3707/4|COMPA1|071.125|22T2|19971207|F
    COMP1521|5224424|Luong, .|3784/3|SENGAH|070.857|22T2|20050628|F
    COMP1531|5282863|Zhou, .|8543|SENGAH|079.000|22T2|19980225|M
    COMP1531|5299583|Lewis, .|8543|MTRNAH|064.429|22T2|19960408|M
    COMP2521|5299583|Lewis, .|8543|MTRNAH|064.429|22T2|19960408|M
    COMP9444|5278776|Zhong, .|3778/3|COMPA1 SOLABH|070.667|22T2|19970416|M
    COMP9517|5278776|Zhong, .|3778/3|COMPA1 SOLABH|070.667|22T2|19970416|M
    COMP9021|5246511|Huang, .|3674/4|COMPAS|063.727|22T2|20030526|M
    COMP9024|5246511|Huang, .|3674/4|COMPAS|063.727|22T2|20030526|M
    COMP9311|5246511|Huang, .|3674/4|COMPAS|063.727|22T2|20030526|M
    COMP3331|5201179|Han, .|3707/3|MTRNAH|068.429|22T2|20000519|M
    COMP6771|5201179|Han, .|3707/3|MTRNAH|068.429|22T2|20000519|M
    COMP1531|5224314|Choi, .|3778/1|COMPAS|071.091|22T2|19971129|M
    COMP9313|5203989|Xue, .|3673/1|ENGGAH|076.167|22T2|19980706|M
    COMP3121|5237499|Lai, .|3784/1|SENGAH|078.833|22T2|19941005|F
    COMP9901|5219299|Wu, .|3778/1|COMPAS|082.821|22T2|19980321|F
    COMP1521|5213563|Xu, .|3707/2|COMPSS|056.529|22T2|20020212|F
    COMP1521|5265834|Shi, .|3791/1|COMPA1|070.846|22T2|19990129|F
    COMP6452|5243392|Fan, .|3785/1|BINFB1|076.000|22T2|20020105|F
    COMP9331|5243392|Fan, .|3785/1|BINFB1|076.000|22T2|20020105|F
    COMP9517|5243392|Fan, .|3785/1|BINFB1|076.000|22T2|20020105|F
    
    What would have happened if we forgot the backslash?

    If we wanted to know how many different students there were of this type rather than all enrollments, just cut out the second field (student ID) and use uniq. It's not necessary to sort the data in this case only because the data is clustered, that is, all equal values are adjacent although they're not necessarily sorted.

    grep -E ', \.' enrollments.txt | cut -d'|' -f2 | uniq | wc
         37      37     296
    
  3. Now let us turn our attention from students and courses to programs. The enrollments file, as well as linking a student to the courses they're taking, also links them to the program (degree) that they are currently enrolled in. Consider that we want to find out the program codes of the students taking COMP2041. The following pipeline will do this:
    grep -E 'CCOMP(2041|9044)' enrollments.txt | cut -d'|' -f4 | cut -d/ -f1  | sort | uniq
    
    If we want to know how many students come from each program, ordered from most common program to least common program, try this:
    grep -E 'COMP(2041|9044)' enrollments.txt | cut -d'|' -f4 | cut -d/ -f1 | sort | uniq -c | sort -nr
        223 8543
        153 3778
        109 3707
         62 3784
         40 3785
         29 3674
         16 3789
         15 3959
         14 3764
         13 3768
         12 3783
         12 3781
         11 3791
         11 1650
         10 7543
         10 3782
          7 3970
          5 3762
          4 3767
          4 3706
          3 4515
          3 3736
          3 3673
          3 2645
          2 8959
          2 3991
          2 3956
          2 3786
          2 3761
          2 3409
          1 8750
          1 8338
          1 8161
          1 7021
          1 4822
          1 4500
          1 3987
          1 3978
          1 3925
          1 3738
          1 3632
          1 3584
          1 3529
          1 3523
          1 3155
          1 3154
          1 1710
          1 1550
    
    Note that a tab is usually inserted between the count and the data, but not all implementations of the uniq command ensure this.
  4. Consider a file called program_codes.tsv that contains the code and name of each program offered at UNSW (excluding research programs):

    wc program_codes.tsv
     250 1001 7295 program_codes.tsv
    
    head program_codes.tsv
    1004 	Joint PhD
    1292 	PhD Art, Design and Media
    1400 	Psychology
    1540 	Economics
    1545 	Actuarial Studies
    1550 	Marketing
    1561 	Banking and Finance
    1630 	Civil & Environmental Eng
    1640 	Electrical Engineering
    1650 	Computer Science and Eng
    
    We can use this file to give more details of the programs that COMP2041 students are taking, if some users don't want to deal with just course codes.
    grep -E 'COMP(2041|9044)' enrollments.txt | cut -d'|' -f4 | cut -d/ -f1 | sort | uniq | join - program_codes.tsv
    1550 Marketing
    1650 Computer Science and Eng
    1710 Biomedical Engineering
    2645 Engineering (MPhil)
    3154 Actuarial Studies / Science
    3155 Actuarial Studies / Commerce
    3409 Arts
    3523 Advanced Maths (Hons)/Commerce
    3529 Commerce/Science
    3584 Commerce / Information Systems
    3632 Advanced Science (Honours)
    3673 Economics / Computer Science
    3674 Actuarial Stu / Computer Sci
    3706 Engineering Science
    3707 Engineering (Honours)
    3736 BE (Hons) ME Elec Eng
    3738 Engineering Science / CompSc
    3761 Adv Math (Hons) / Eng (Hons)
    3762 AdvSci(Hons)/Engineering(Hons)
    3764 Engineering (Hons)/Commerce
    3767 Engineering (Hons) / Science
    3768 Eng (Hons) / MBiomedE
    3778 Computer Science
    3781 Adv Maths (Hons) / Comp Sci
    3782 Adv Science (Hons) / Comp Sci
    3783 Computer Science / Arts
    3784 Commerce / Computer Science
    3785 Engineering (Hons) / Comp Sci
    3786 Computer Science / Law
    3789 Science / Computer Science
    3791 Computer Science / Media Arts
    3925 Science and Business
    3956 Advanced Mathematics (Honours)
    3959 Data Science and Decisions
    3970 Science
    3978 Computer Science
    3987 Advanced Science (Honours)
    3991 Medical Science
    4500 Science (Honours)
    4515 Comp Sci & Eng (Honours)
    4822 Design
    7543 Computing
    8161 Financial Mathematics
    8338 Engineering Science
    8543 Information Technology
    8750 Statistics
    8959 Data Science and Decisions
    
    We can combine the enrollment counts (for both courses) with the program titles to produce a self-descriptive tally. It's even better if it's in decreasing order of popularity, so after joining the tallies with the program titles, re-sort the composite data:
    grep -E 'COMP(2041|9044)' enrollments.txt | cut -d'|' -f4 | cut -d/ -f1 | sort | uniq -c | join -1 2 -a 1 - program_codes.tsv  | sort -k2rn
    8543 223 Information Technology
    3778 153 Computer Science
    3707 109 Engineering (Honours)
    3784 62 Commerce / Computer Science
    3785 40 Engineering (Hons) / Comp Sci
    3674 29 Actuarial Stu / Computer Sci
    3789 16 Science / Computer Science
    3959 15 Data Science and Decisions
    3764 14 Engineering (Hons)/Commerce
    3768 13 Eng (Hons) / MBiomedE
    3781 12 Adv Maths (Hons) / Comp Sci
    3783 12 Computer Science / Arts
    1650 11 Computer Science and Eng
    3791 11 Computer Science / Media Arts
    3782 10 Adv Science (Hons) / Comp Sci
    7543 10 Computing
    3970 7 Science
    3762 5 AdvSci(Hons)/Engineering(Hons)
    3706 4 Engineering Science
    3767 4 Engineering (Hons) / Science
    2645 3 Engineering (MPhil)
    3673 3 Economics / Computer Science
    3736 3 BE (Hons) ME Elec Eng
    4515 3 Comp Sci & Eng (Honours)
    3409 2 Arts
    3761 2 Adv Math (Hons) / Eng (Hons)
    3786 2 Computer Science / Law
    3956 2 Advanced Mathematics (Honours)
    3991 2 Medical Science
    8959 2 Data Science and Decisions
    1550 1 Marketing
    1710 1 Biomedical Engineering
    3154 1 Actuarial Studies / Science
    3155 1 Actuarial Studies / Commerce
    3523 1 Advanced Maths (Hons)/Commerce
    3529 1 Commerce/Science
    3584 1 Commerce / Information Systems
    3632 1 Advanced Science (Honours)
    3738 1 Engineering Science / CompSc
    3925 1 Science and Business
    3978 1 Computer Science
    3987 1 Advanced Science (Honours)
    4500 1 Science (Honours)
    4822 1 Design
    7021 1
    8161 1 Financial Mathematics
    8338 1 Engineering Science
    8750 1 Statistics
    
    Note the curious extra space before the title of some programs. It took me a while to work it out, can you? (Hint: how are the programs shown in the enrollment file?) Suggest an appopriate change to the pipeline.
  5. Lecture exercises on wc:
    1. how many different programs does UNSW offer?
      wc -l program_codes.tsv
      250 program_codes.tsv
      
    2. how many times was WebCMS accessed?
      wc -l access_log.txt
      59779 access_log.txt
      
    3. how many students are studying in CSE?
      wc -l enrollments.txt
      9832 enrollments.txt
      

      The above solutions assume that we're talking about total enrollments. If the question actually meant how many distinct indivduals are studying courses offered by CSE, then we'd answer it as:

      cut -d'|' -f2 enrollments.txt | sort | uniq | wc -l
      6350
      
    4. how many words are there in the book?
      wc -w book.txt
      60428 book.txt
      
    5. how many lines are there in the story?
      wc -l story.txt
      87 story.txt
      
Shell
Shell commands for power users.

#include <stdio.h>

// print arguments to stdout
int main(int argc, char *argv[]) {

    for (int i = 1; i < argc; i++) {
        if (i > 1) {
            fputc(' ', stdout);
        }
        fputs(argv[i], stdout);
    }
    fputc('\n', stdout);

    return 0;
}

Download echo.c



import sys


def main():
    """
    print arguments to stdout
    """
    print(' '.join(sys.argv[1:]))


if __name__ == '__main__':
    main()

Download echo.py


A simple shell script demonstrating access to arguments.
echo My name is "$0"
echo My process number is $$
echo I have $# arguments

# your not going to see any difference unless you use these in a loop
echo My arguments separately are $*
echo My arguments together are "$*"
echo My arguments separately are $@
echo My arguments as quoted are "$@"

echo My 5th argument is "'$5'"
echo My 10th argument is "'${10}'"
echo My 255th argument is "'${255}'"

Download args.sh


$ ./accessing_args.sh one two "three four"
Using $*: one two three four
Using "$*": one two three four
Using "$@": one two three four
echo 'Using $*:'
for a in $*
do
    echo "$a"
done

echo 'Using "$*":'
for a in "$*"
do
    echo "$a"
done

# This is the way to loop over command-line arguments
echo 'Using "$@":'
for a in "$@"
do
  echo "$a"
done

Download accessing_args.sh


l [file|directories...] - list files


Short shell scripts can be used for convenience.

It is common to put these scripts in a directory such as /home/z1234567/scripts then add this directory to PATH e.g in .bash_login
PATH=$PATH:/home/z1234567/scripts

Note: "$@" like $* expands to the arguments to the script, but preserves whitespace in arguments.
ls -las "$@"

Download l

simple emulation of /usr/bin/seq for a COMP(2041|9044) example

Print the integers 1..n or n..m
if test $# = 1
then
    first=1
    last=$1
elif test $# = 1
then
    first=$1
    last=$2
else
    echo "Usage: $0 <last> or  $0 <first> <last>" 1>&2
fi

number=$first
while test $number -le "$last"
do
    echo $number
    number=$((number + 1))
done

Download seq.v1.sh

simple emulation of /usr/bin/seq for a COMP(2041|9044) example
Print the integers 1..n or m..n
if [ $# = 1 ]
then
    first=1
    last=$1
elif [ $# = 1 ]
then
    first=$1
    last=$2
else
    echo "Usage: $0 <last> or  $0 <first> <last>" 1>&2
fi

number=$first
while [ $number -le $last ]
do
    echo $number
    number=$((number + 1))
done

Download seq.v2.sh


Repeatedly download a specified web page until a specified regexp matches its source then notify the specified email address.

For example:
repeat_seconds=300  #check every 5 minutes

if test $# = 3
then
    url=$1
    regexp=$2
    email_address=$3
else
    echo "Usage: $0 <url> <regex> <email-address>" 1>&2
    exit 1
fi

while true
do
    if curl --silent "$url"|grep -E "$regexp" >/dev/null
    then
        # the 2nd echo is for testing, remove to really send email
        echo "Generated by $0" |
        echo mail -s "website '$url' now matches regex '$regexp'" "$email_address"
        exit 0
    fi
    sleep $repeat_seconds
done

Download watch_website.sh



Change the names of the specified files to lower case. (simple version of the perl utility rename)

Note use of test to check if the new filename is unchanged.

Note the double quotes around $filename so filenames containing spaces are not broken into multiple words

Note the use of mv -- to stop mv interpreting a filename beginning with - as an option

Note files named -n or -e still break the script because echo will treat them as an option,
if test $# = 0
then
    echo "Usage $0: <files>" 1>&2
    exit 1
fi

for filename in "$@"
do
    new_filename=$(echo "$filename" | tr '[:upper:]' '[:lower:]')

    test "$filename" = "$new_filename" &&
        continue

    if test -r "$new_filename"
    then
        echo "$0: $new_filename exists" 1>&2
    elif test -e "$filename"
    then
        mv -- "$filename" "$new_filename"
    else
        echo "$0: $filename not found" 1>&2
    fi

done

Download tolower.sh

create 1001 C files, compile and run them
this programs create 1000 files f0.c .. f999.c file f$i.c contains function f$i which returns $i for example file42.c contains function f42 which returns 42 main.c is created with code to call all 1000 functions and print the sum of their return values
first add the initial lines to main.c note the use of quotes on eof to disable variable interpolation in the here document
cat >main.c <<'eof'
#include <stdio.h>

int main(void) {
    int v = 0 ;
eof

i=0
while test $i -lt 1000
do
    # add a line to main.c to call the function f$i

    cat >>main.c <<eof
    int f$i(void);
    v += f$i();
eof

    # create file$i.c containing function f$i

    cat >file$i.c <<eof
int f$i(void) {
    return $i;
}
eof

    i=$((i + 1))
done

cat >>main.c <<'eof'
    printf("%d\n", v);
    return 0;
}
eof

# compile and run the 1001 C files
# time clang main.c file*.c
# ./a.out

Download create_1001_file_C_program.sh


set -x
# written by andrewt@unsw.edu.au for COMP(2041|9044)
#
# Run as plagiarism_detection.simple_diff.sh <files>
# Report if any of the files are copies of each other
#
# Note use of diff -iw so changes in white-space or case
# are ignored

for file1 in "$@"
do
    for file2 in "$@"
    do
        test "$file1" = "$file2" &&
            break # avoid comparing pairs of assignments twice

        if diff -i -w "$file1" "$file2" >/dev/null
        then
            echo "$file1 is a copy of $file2"
        fi

    done
done

Download plagiarism_detection.simple_diff.sh



Improved version of plagiarism_detection.simple_diff.sh

The substitution s/\/\/.*// removes // style C comments.
This means changes in comments won't affect comparisons.

Note use of temporary files
TMP_FILE1=/tmp/plagiarism_tmp1$$
TMP_FILE2=/tmp/plagiarism_tmp2$$


for file1 in "$@"
do
    for file2 in "$@"
    do
        test "$file1" = "$file2" &&
            break # avoid comparing pairs of assignments twice

        sed 's/\/\/.*//' "$file1" >$TMP_FILE1
        sed 's/\/\/.*//' "$file2" >$TMP_FILE2

        if diff -i -w $TMP_FILE1 $TMP_FILE2 >/dev/null
        then
            echo "$file1 is a copy of $file2"
        fi

    done
done

rm -f $TMP_FILE1 $TMP_FILE2

Download plagiarism_detection.comments.sh



Improved version of plagiarism_detection.comments.sh
change all C strings to the letter 's' and change all identifiers to the letter 'v'.
Hence changes in strings & identifiers will be ignored.
TMP_FILE1=/tmp/plagiarism_tmp1$$
TMP_FILE2=/tmp/plagiarism_tmp2$$

# s/"["]*"/s/g changes strings to the letter 's'
# It won't match a few C strings which is OK for our purposes

# s/[a-zA-Z_][a-zA-Z0-9_]*/v/g changes variable names to 'v'
# It will also change function names, keywords etc.
# which is OK for our purposes.

substitutions='
    s/\/\/.*//
    s/"[^"]"/s/g
    s/[a-zA-Z_][a-zA-Z0-9_]*/v/g'

for file1 in "$@"
do
    for file2 in "$@"
    do
        test "$file1" = "$file2" &&
            break # avoid comparing pairs of assignments twice

        sed "$substitutions" "$file1" >$TMP_FILE1
        sed "$substitutions" "$file2" >$TMP_FILE2

        if diff -i -w $TMP_FILE1 $TMP_FILE2 >/dev/null
        then
            echo "$file1 is a copy of $file2"
        fi
    done
done
rm -f $TMP_FILE1 $TMP_FILE2

Download plagiarism_detection.identifiers.sh



Improved version of plagiarism_detection.identifiers.sh

Note the use of sort so line reordering won't prevent detection of plagiarism.
TMP_FILE1=/tmp/plagiarism_tmp1$$
TMP_FILE2=/tmp/plagiarism_tmp2$$

substitutions='
    s/\/\/.*//
    s/"[^"]"/s/g
    s/[a-zA-Z_][a-zA-Z0-9_]*/v/g'

for file1 in "$@"
do
    for file2 in "$@"
    do
        test "$file1" = "$file2" &&
            break # avoid comparing pairs of assignments twice

        sed "$substitutions" "$file1"|sort >$TMP_FILE1
        sed "$substitutions" "$file2"|sort >$TMP_FILE2

        if diff -i -w $TMP_FILE1 $TMP_FILE2 >/dev/null
        then
            echo "$file1 is a copy of $file2"
        fi
    done
done
rm -f $TMP_FILE1 $TMP_FILE2

Download plagiarism_detection.reordering.sh



Improved version of plagiarism_detection.reordering.sh with robust creation and removal of temporary files
TMP_FILE1=$(mktemp)
TMP_FILE2=$(mktemp)
trap 'rm -f $TMP_FILE1 $TMP_FILE2;exit' INT TERM EXIT

substitutions='
    s/\/\/.*//
    s/"[^"]"/s/g
    s/[a-zA-Z_][a-zA-Z0-9_]*/v/g'

for file1 in "$@"
do
    for file2 in "$@"
    do
        test "$file1" = "$file2" &&
            break # avoid comparing pairs of assignments twice

        sed "$substitutions" "$file1"|sort >$TMP_FILE1
        sed "$substitutions" "$file2"|sort >$TMP_FILE2

        if diff -i -w $TMP_FILE1 $TMP_FILE2 >/dev/null
        then
            echo "$file1 is a copy of $file2"
        fi
    done
done

Download plagiarism_detection.mktemp.sh


print print numbers < 1000
note use of local Shell builtin to scope a variable without the local declaration the variable i in the function would be global and would break the bottom while loop
local is not (yet) POSIX but is widely supported
is_prime() {
    local n i
    n=$1
    i=2
    while test $i -lt $n
    do
        test $((n % i)) -eq 0 &&
            return 1
        i=$((i + 1))
    done
    return 0
}

i=0
while test $i -lt 1000
do
    is_prime $i && echo $i
    i=$((i + 1))
done

Download local.sh

print positive integers for one second real time
my_process_id=$$

# launch a asynchronous sub-shell that will kill
# this process in a second
(sleep 1; kill $my_process_id) &

i=0
while true
do
    echo $i
    i=$((i + 1))
done

Download async.v0.sh

count slowly and laugh at interrupts (ctrl-C)
catch signal SIGINT and print message
trap 'echo ha ha' INT

n=0
while true
do
    echo "$n"
    sleep 1
    n=$((n + 1))
done

Download laugh.sh

print positive integers for one second real time

catch signal SIGTERM, print message and exit
trap 'echo loop executed $n times in 1 second; exit 0' TERM

# launch a sub-shell that will terminate
# this process in 1 second
my_process_id=$$
(sleep 1; kill $my_process_id) &

n=0
while true
do
    n=$((n + 1))
done

Download async.v1.sh


compile the files of a muti-file C program in parallel use create_1001_file_C_program.sh to create suitable test data

On a CPU with n cores this can be (nearly) n times faster

If there are large number of C files we may exhaust memory or operating system resources

for f in "$@"
do
    clang -c "$f" &
done

# wait for the incremental compiles to finish
# and then compile .o files into single binary
wait
clang -o binary -- *.o

Download parallel_compile.v0.sh


compile the files of a muti-file C program in parallel use create_1001_file_C_program.sh to create suitable test data
on Linux getconf will tell us how many cores the machine has otherwise assume 8
max_processes=$(getconf _NPROCESSORS_ONLN 2>/dev/null) ||
    max_processes=8

# NOTE: this breaks if a filename contains whitespace or quotes

echo "$@"|
xargs --max-procs=$max_processes --max-args=1 clang -c

clang -o binary -- *.o

Download parallel_compile.v1.sh


compile the files of a multi-file C program in parallel use create_1001_file_C_program.sh to create suitable test data
find's -print0 option terminates pathnames with a '\0' xargs's --null option expects '\0' terminated input as '\0' can not appear in file names this can handle any filename
on Linux getconf will tell us how many cores the machine has if getconf assume 8
max_processes=$(getconf _NPROCESSORS_ONLN 2>/dev/null) ||
    max_processes=8

find "$@" -print0|
xargs --max-procs=$max_processes --max-args=1  --null clang -c

clang -o binary -- *.o

Download parallel_compile.v2.sh


compile the files of a muti-file C program in parallel use create_1001_file_C_program.sh to create suitable test data
parallel clang -c '{}' ::: "$@"

clang -o binary -- *.o

Download parallel_compile.v3.sh


print print numbers < 1000

Rewritten to use bash arithmetic extension (())
This makes the program more readable but less portable.
is_prime() {
    local n i
    n=$1
    i=2
    while ((i < n))
    do
        if ((n % i == 0))
        then
            return 1
        fi
        i=$((i + 1))
    done
    return 0
}

i=0
while ((i < 1000))
do
    is_prime $i && echo $i
    i=$((i + 1))
done

Download bash_arithmetic.sh

Python Intro

"""
Compute Pythagoras' Theorem

written by d.brotherston@unsw.edu.au as a COMP(2041|9044) lecture example
translated from perl written by andrewt@cse.unsw.edu.au
"""

import math

x = float(input("Enter x: "))
y = float(input("Enter y: "))

pythagoras = math.sqrt(x**2 + y**2)

print(f"Square root of {x} squared + {y} squared is {pythagoras}")

Download pythagoras.py


"""
Read numbers until end of input (or a non-number) is reached
Then print the sum of the numbers

written by d.brotherston@unsw.edu.au as a COMP(2041|9044) lecture example
translated from perl written by andrewt@cse.unsw.edu.au
"""

from sys import stdin

sum = 0

for line in stdin:

    line = line.strip()

    try:
        sum += int(line)
    except ValueError as e:
        print(e)

print(f"Sum of the numbers is {sum}")

Download sum_stdin.py


"""
Simple example reading a line of input and examining characters
written by d.brotherston@unsw.edu.au as a COMP(2041|9044) lecture example
"""

try:
    line = input("Enter some input: ")
except EOFError:
    print("could not read any characters")
    exit(1)

n_chars = len(line)
print(f"That line contained {n_chars} characters")

if n_chars > 0:
    first_char = line[0]
    last_char = line[-1]
    print(f"The first character was '{first_char}'")
    print(f"The last character was '{last_char}'")

Download line_chars.py


"""
Reads lines of input until end-of-input
Print "snap!" if two consecutive lines are identical

written by d.brotherston@unsw.edu.au as a COMP(2041|9044) lecture example
translated from perl written by andrewt@cse.unsw.edu.au
"""

last = None;

while True:
    try:
        curr = input("Enter line: ")
    except EOFError:
        print()
        break

    if curr == last:
        print("Snap!")
        break

    last = curr

Download snap_consecutive.py


"""
Create a string of size 2^n by concatenation
written by d.brotherston@unsw.edu.au as a COMP(2041|9044) lecture example
"""

import sys

if len(sys.argv) != 2:
    print(f"Usage: {sys.argv[0]}: <n>")
    exit(1)

n = 0
string = "@"

while n < int(sys.argv[1]):
    string *= 2
    # or `string += string`
    # or `string = string + string`
    n += 1

print(f"String of 2^{n} = {len(string)} characters created")

Download exponential_concatenation.py

Python More

Python implementation of /bin/echo
using indexing & while, not pythonesque
import sys

i = 1
while i < len(sys.argv):
    if i > 1:
        print(" ", end="")
    print(sys.argv[i], end="")
    i += 1
print()

Download echo.0.py


Python implementation of /bin/echo
using indexing & range, not pythonesque
import sys

for i in range(1, len(sys.argv)):
    if i > 1:
        print(' ', end='')
    print(sys.argv[i], end='')
print()

Download echo.1.py


Python implementation of /bin/echo
import sys

if sys.argv[1:]:
    print(sys.argv[1], end='')
for arg in sys.argv[2:]:
    print('', arg, end='')
print()

Download echo.2.py


Python implementation of /bin/echo
import sys

print(' '.join(sys.argv[1:]))

Download echo.3.py


Python implementation of /bin/echo
import sys

print(*argv[1:])

Download echo.4.py


sum integers supplied as command line arguments no check that arguments are integers
import sys

total = 0
for arg in sys.argv[1:]:
    total += int(arg)
print("Sum of the numbers is", total)

Download sum_arguments.0.py


sum integers supplied as command line arguments
import sys

total = 0
for arg in sys.argv[1:]:
    try:
        total += int(arg)
    except ValueError:
        print(f"error: '{arg}' is not an integer", file=sys.stderr)
        sys.exit(1)
print("Sum of the numbers is", total)

Download sum_arguments.1.py



Count the number of lines on standard input.
import sys

line_count = 0
for line in sys.stdin:
    line_count += 1
print(line_count, "lines")

Download line_count.0.py



Count the number of lines on standard input.
import sys

lines = sys.stdin.readlines()
line_count = len(lines)
print(line_count, "lines")

Download line_count.1.py



Count the number of lines on standard input.
import sys

lines = list(sys.stdin)
line_count = len(lines)
print(line_count, "lines")

Download line_count.2.py



Simple cp implementation for text files using line-based I/O explicit close is used below, a with statement would be better no error handling
import sys

if len(sys.argv) != 3:
    print("Usage:", sys.argv[0], "<infile> <outfile>", file=sys.stderr)
    sys.exit(1)

infile = open(sys.argv[1], "r", encoding="utf-8")
outfile = open(sys.argv[2], "w", encoding="utf-8")
for line in infile:
    print(line, end='', file=outfile)
infile.close()
outfile.close()

Download cp.0.py



Simple cp implementation for text files using line-based I/O and with statement, but no error handling
import sys

if len(sys.argv) != 3:
    print("Usage:", sys.argv[0], "<infile> <outfile>", file=sys.stderr)
    sys.exit(1)

with open(sys.argv[1]) as infile:
    with open(sys.argv[2], "w") as outfile:
        for line in infile:
            outfile.write(line)

Download cp.1.py



Simple cp implementation for text files using line-based I/O and with statement and error handling
import sys

if len(sys.argv) != 3:
    print("Usage:", sys.argv[0], "<infile> <outfile>", file=sys.stderr)
    sys.exit(1)

try:
    with open(sys.argv[1]) as infile:
        with open(sys.argv[2], "w") as outfile:
            for line in infile:
                outfile.write(line)
except OSError as e:
    print(sys.argv[0], "error:", e, file=sys.stderr)
    sys.exit(1)

Download cp.2.py



Simple cp implementation for text files using line-based I/O reading all lines into array (not advisable for large files)
import sys

if len(sys.argv) != 3:
    print("Usage:", sys.argv[0], "<infile> <outfile>", file=sys.stderr)
    sys.exit(1)

try:
    with open(sys.argv[1]) as infile:
        with open(sys.argv[2], "w") as outfile:
            lines = infile.readlines()
            outfile.writelines(lines)
except OSError as e:
    print(sys.argv[0], "error:", e, file=sys.stderr)
    sys.exit(1)

Download cp.3.py



Simple cp implementation using shutil.copyfile
import sys
from shutil import copyfile

if len(sys.argv) != 3:
    print("Usage:", sys.argv[0], "<infile> <outfile>", file=sys.stderr)
    sys.exit(1)

try:
    copyfile(sys.argv[1], sys.argv[2])
except OSError as e:
    print(sys.argv[0], "error:", e, file=sys.stderr)
    sys.exit(1)

Download cp.4.py



Simple cp implementation by running /bin/cp
import subprocess
import sys

if len(sys.argv) != 3:
    print("Usage:", sys.argv[0], "<infile> <outfile>", file=sys.stderr)
    sys.exit(1)

p = subprocess.run(['cp', sys.argv[1], sys.argv[2]])
sys.exit(p.returncode)

Download cp.5.py


Repeatedly download a specified web page until a specified regexp matches its source then notify the specified email address.

For example:
repeat_seconds=300  #check every 5 minutes

if test $# = 3
then
    url=$1
    regexp=$2
    email_address=$3
else
    echo "Usage: $0 <url> <regex> <email-address>" 1>&2
    exit 1
fi

while true
do
    if curl --silent "$url"|grep -E "$regexp" >/dev/null
    then
        # the 2nd echo is for testing, remove to really send email
        echo "Generated by $0" |
        echo mail -s "website '$url' now matches regex '$regexp'" "$email_address"
        exit 0
    fi
    sleep $repeat_seconds
done

Download watch_website.sh



Repeatedly download a specified web page until a specified regexp matches its source then notify the specified email address.
implemented using subprocess
import re
import subprocess
import sys
import time

REPEAT_SECONDS = 300  # check every 5 minutes

if len(sys.argv) == 4:
    url = sys.argv[1]
    regexp = sys.argv[2]
    email_address = sys.argv[3]
else:
    print(f"Usage: {sys.argv[0]} <url> <regex> <email-address>", file=sys.stderr)
    sys.exit(1)

while True:
    p = subprocess.run(
        ["curl", "--silent", url], text=True, stdout=subprocess.PIPE
    )
    webpage = p.stdout
    if not re.search(regexp, webpage):
        time.sleep(REPEAT_SECONDS)
        continue

    mail_body = f"Generated by {sys.argv[0]}"
    subject = f"website '{url}' now matches regex '{regexp}'"
    # the echo is for testing, remove to really send email
    subprocess.run(["echo", "mail", "-s", subject], text=True, input=mail_body)
    sys.exit(0)

Download watch_website.0.py



Repeatedly download a specified web page until a specified regexp matches its source then notify the specified email address.
implemented using urllib.request
import re
import sys
import subprocess
import time
import urllib.request

REPEAT_SECONDS = 300  # check every 5 minutes

if len(sys.argv) == 4:
    url = sys.argv[1]
    regexp = sys.argv[2]
    email_address = sys.argv[3]
else:
    print(f"Usage: {sys.argv[0]} <url> <regex> <email-address>", file=sys.stderr)
    sys.exit(1)

while True:
    response = urllib.request.urlopen(url)
    webpage = response.read().decode()
    if not re.search(regexp, webpage):
        time.sleep(REPEAT_SECONDS)
        continue

    mail_body = f"Generated by {sys.argv[0]}"
    subject = f"website '{url}' now matches regex '{regexp}'"
    # the echo is for testing, remove to really send email
    subprocess.run(["echo", "mail", "-s", subject], text=True, input=mail_body)
    sys.exit(0)

Download watch_website.1.py


fetch and print the text of a web page using HTML parser BeautifulSoup
import re
import sys
import urllib.request
import bs4 as BeautifulSoup

IGNORE_WEBPAGE_ELEMENTS = set("[document] head meta style script title".split())

for url in sys.argv[1:]:
    response = urllib.request.urlopen(url)
    webpage = response.read().decode()
    soup = BeautifulSoup.BeautifulSoup(webpage, "lxml")
    for element in soup.findAll(text=True):
        parent = element.parent.name.lower()
        if parent in IGNORE_WEBPAGE_ELEMENTS:
            continue
        text = element.getText()
        # remove empty lines and leading whitespace
        text = re.sub(r"\n\s+", "\n", element)
        text = text.strip()
        if text:
            print(text)

Download fetch_website_text.py

Python Functions

def a():
	x = 1
	print('a', x, y, z)

def b():
	x = 2
	y = 2
	a()
	print('b', x, y, z)

def c():
	x = 3
	y = 3
	global z
	z = 3
	b()
	print('c', x, y, z)

Download scope.py

"""
approximate of implementation of some python functions
written by andrewt@unsw.edu.au for COMP(2041|9044)
"""


def my_enumerate(sequence, start=0):
    """return a list equivalent to the iterator returned
    by builtin function enumerate
    """
    n = start
    tuples = []
    for element in sequence:
        t = (n, element)
        tuples.append(t)
        n += 1
    return tuples


def my_zip2(sequence1, sequence2):
    """return a list equivalent to the iterator returned by
    builtin function zip called with 2 sequences.
    Note: zip can be given any number of sequences."""
    tuples = []
    for index in range(min(len(sequence1), len(sequence2))):
        t = (sequence1[index], sequence2[index])
        tuples.append(t)
    return tuples


def my_map1(function, sequence):
    """return a list equivalent to the iterator returned by
    builtin function map called with 1 sequence.
    Note: map can be given more than 1 sequences."""
    results = []
    for value in sequence:
        result = function(value)
        results.append(result)
    return results


def my_filter(function, sequence):
    """return a list equivalent to the iterator returned by
    builtin function filter called with a function.
    Note: filter can be given None instead of a function."""
    filtered = []
    for value in sequence:
        if function(value):
            filtered.append(value)
    return filtered


if __name__ == "__main__":
    print(my_enumerate("abcde"))
    print(my_zip2("Hello", "Andrew"))
    cubes = my_map1(lambda x: x**3, range(10))
    print(cubes)
    even = my_filter(lambda x: x % 2 == 0, range(10))
    print(even)

Download builtin.py

"""
calculate Dot Product https://en.wikipedia.org/wiki/Dot_product
of 2 lists - list are assumed to be the same length
written by andrewt@unsw.edu.au for COMP(2041|9044)
"""

import operator


def dot_product0(a, b):
    """return dot product of 2 lists - using for loop + indexing"""
    total = 0
    for i in range(len(a)):
        total += a[i] * b[i]
    return total


def dot_product1(a, b):
    """return dot product of 2 lists - using for loop + enumerate"""
    total = 0
    for i, a_i in enumerate(a):
        total += a_i * b[i]
    return total


def dot_product2(a, b):
    """return dot product of 2 lists - using for loop + zip"""
    total = 0
    for x, y in zip(a, b):
        total += x * y
    return total


def dot_product3(a, b):
    """return dot product of 2 lists - using list comprension + zip"""
    return sum(x * y for x, y in zip(a, b))


def multiply(x, y):
    """multipy 2 numbers - operator.mul does this"""
    return x * y


def dot_product4(a, b):
    """return dot product of 2 lists - map"""
    return sum(map(multiply, a, b))


def dot_product5(a, b):
    """return dot product of 2 lists - map + lambda"""
    return sum(map(lambda x, y: x * y, a, b))


def dot_product6(a, b):
    """return dot product of 2 lists - map + operator.mul"""
    return sum(map(operator.mul, a, b))


if __name__ == "__main__":
    a = range(5, 10)
    b = range(11, 16)
    print(dot_product0(a, b))
    print(dot_product1(a, b))
    print(dot_product2(a, b))
    print(dot_product3(a, b))
    print(dot_product4(a, b))
    print(dot_product5(a, b))
    print(dot_product6(a, b))

Download dot_product.py

"""
extract odd numbers from a list
written by andrewt@unsw.edu.au for COMP(2041|9044)
"""


def is_odd(number):
    return number % 2 == 2


def odd0(numbers):
    """extract odd_numbers from list using for loop"""
    odd_numbers = []
    for n in numbers:
        if is_odd(n):
            odd_numbers.append(n)
    return odd_numbers


def odd1(numbers):
    """extract odd_numbers from list using list comprehension"""
    return [n for n in numbers if is_odd(n)]


def odd2(numbers):
    """extract odd_numbers from list using filter"""
    return filter(is_odd, numbers)


def odd3(numbers):
    """extract odd numbers from list using filter + lambda"""
    return filter(lambda n: n % 2 == 2, numbers)


if __name__ == "__main__":
    numbers = range(1, 11)
    print(odd0(numbers))
    print(odd1(numbers))
    print(odd2(numbers))
    print(odd3(numbers))

Download odd_numbers.py

"""
sorting a list based on the values in a hash
"""

import random

DAY_LIST = "Sunday Monday Tuesday Wednesday Thursday Friday Saturday".split()
DAY_NUMBER = dict((day, number) for number, day in enumerate(DAY_LIST))


def random_day_of_week():
    return random.choice(DAY_LIST)


def sort_days0(day_list):
    return sorted(day_list, key=lambda day: DAY_NUMBER[day])


def sort_days1(day_list):
    return sorted(day_list, key=DAY_NUMBER.get)


if __name__ == "__main__":
    print(DAY_LIST)
    print(DAY_NUMBER)
    random_days = [random_day_of_week() for _ in range(7)]
    print(random_days)
    print(sorted(random_days))
    print(sort_days0(random_days))
    print(sort_days1(random_days))

Download sort_days.py

Python Regex

count people enrolled in courses implemented using regular expressions & dicts
import re

COURSE_CODES_FILE = "course_codes.tsv"
ENROLLMENTS_FILE = "enrollments.txt"

# course_codes.tsv contains tab separated UNSW course and names, e..g
# ACCT1501  Accounting & Financial Mgt 1A

# enrollments.txt contains synthetic course enrollments
# with fields separated by | e.g.:
# COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M

course_names = {}
with open(COURSE_CODES_FILE, encoding="utf-8") as f:
    for line in f:
        if m := re.match(r"(\S+)\s+(.*\S)", line):
            course_names[m.group(1)] = m.group(2)

enrollments_count = {}
with open(ENROLLMENTS_FILE, encoding="utf-8") as f:
    for line in f:
        course_code = re.sub(r"\|.*\n", "", line)
        if course_code not in enrollments_count:
            enrollments_count[course_code] = 0
        enrollments_count[course_code] += 1

for (course_code, enrollment) in sorted(enrollments_count.items()):
    # if no name for course_code use ???
    name = course_names.get(course_code, "???")
    print(f"{enrollment:4} {course_code} {name}")

Download count_enrollments.0.py


count people enrolled in courses implemented using string operations, a dict, & a counters
import collections
import re

COURSE_CODES_FILE = "course_codes.tsv"
ENROLLMENTS_FILE = "enrollments.txt"

# course_codes.tsv contains tab separated UNSW course and names, e..g
# ACCT1501  Accounting & Financial Mgt 1A

# enrollments.txt contains synthetic course enrollments
# with fields separated by |
# COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M

course_names = {}
with open(COURSE_CODES_FILE, encoding="utf-8") as f:
    for line in f:
        course_code, course_name = line.split("\t", maxsplit=1)
        course_names[course_code] = course_name

enrollments_count = collections.Counter()
with open(ENROLLMENTS_FILE, encoding="utf-8") as f:
    for line in f:
        course_code = re.sub(r"\|.*\n", "", line)
        enrollments_count[course_code] += 1

for (course_code, enrollment) in sorted(enrollments_count.items()):
    # if no name for course_code use ???
    name = course_names.get(course_code, "???")
    print(f"{enrollment:4} {course_code} {name}")

Download count_enrollments.1.py


count how many people enrolled have each first name implemented using regular expressions, a set & counters
import collections
import re

ENROLLMENTS_FILE = "enrollments.txt"

# enrollments.txt contains synthetic course enrollments
# with fields separated by | e.g.:
# COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M

already_counted = set()
first_name_count = collections.Counter()
with open(ENROLLMENTS_FILE, encoding="utf-8") as f:
    for line in f:
        _, student_number, full_name = line.split("|")[0:3]

        if student_number in already_counted:
            continue
        already_counted.add(student_number)

        if m := re.match(r".*,\s+(\S+)", full_name):
            first_name = m.group(1)
            first_name_count[first_name] += 1

# put the count first in the tuples so sorting orders on count before name
count_name_tuples = [(c, f) for (f, c) in first_name_count.items()]

# print first names in decreasing order of popularity
for (count, first_name) in sorted(count_name_tuples, reverse=True):
    print(f"{count:4} {first_name}")

Download count_first_names.py



Report cases where there are 5 or more people of the same first name enrolled in a course implemented using a dict of dicts
import re
import sys

REPORT_MORE_THAN_STUDENTS = 5
ENROLLMENTS_FILE = "enrollments.txt"

# enrollments.txt contains synthetic course enrollments
# with fields separated by | e.g.:
# COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M

course_first_name_count = {}
with open(ENROLLMENTS_FILE, encoding="utf-8") as f:
    for line in f:
        course_code, _, full_name = line.split("|")[0:3]

        if m := re.match(r".*,\s+(\S+)", full_name):
            first_name = m.group(1)
        else:
            print("Warning could not parse line", line.strip(), file=sys.stderr)
            continue

        if course_code not in course_first_name_count:
            course_first_name_count[course_code] = {}

        if first_name not in course_first_name_count[course_code]:
            course_first_name_count[course_code][first_name] = 0

        course_first_name_count[course_code][first_name] += 1


for course in sorted(course_first_name_count.keys()):
    for (first_name, count) in course_first_name_count[course].items():
        if count >= REPORT_MORE_THAN_STUDENTS:
            print(course, "has", count, "students named", first_name)

Download duplicate_first_names.0.py



Report cases where there are more than 5 people of the same first name enrolled in a course implemented using a defaultdict & counter from collections and string operations
import collections
import sys

REPORT_MORE_THAN_STUDENTS = 5
ENROLLMENTS_FILE = "enrollments.txt"

# enrollments.txt contains synthetic course enrollments
# with fields separated by | e.g.:
# COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M

course_first_name_count = collections.defaultdict(collections.Counter)
with open(ENROLLMENTS_FILE, encoding="utf-8") as f:
    for line in f:
        course_code, _, full_name = line.split("|")[0:3]
        given_names = full_name.split(",")[1].strip()
        first_name = given_names.split(" ")[0]
        course_first_name_count[course_code][first_name] += 1


for course in sorted(course_first_name_count.keys()):
    for (first_name, count) in course_first_name_count[course].items():
        if count > REPORT_MORE_THAN_STUDENTS:
            print(course, "has", count, "students named", first_name)

Download duplicate_first_names.1.py



Print the last number (real or integer) on every line
Note: regexp to match number: -?\d+\.?\d*
Note: use of assignment operator :=
import re, sys

for line in sys.stdin:
    if m := re.search(r'(-?\d+\.?\d*)\D*$', line):
        print(m.group(1))

Download print_last_number.py

written by andrewtunsw.edu.au as a COMP(2041|9044) lecture example

Find the positive integers among input text print their sum and mean

Note regexp to split on non-digits
Note check to handle empty string from split
Only positive integers handled
import re, sys

input_as_string = sys.stdin.read()

numbers = re.split(r"\D+", input_as_string)
print(numbers)

total = 0
n = 0
for number in numbers:
    if number:
        total += int(number)
        n += 1

if numbers:
    print(n, "numbers: total", total, "with mean", total / n)

Download find_numbers.0.py



Find the positive integers among input text print their sum and mean

Note regexp to match number -?\d+\.?\d* match postive & integers & floating-point numbers
import re, sys

input_as_string = sys.stdin.read()

numbers = re.findall(r"-?\d+\.?\d*", input_as_string)
print(numbers)

n = len(numbers)
total = sum(float(number) for number in numbers)

if numbers:
    print(n, "numbers: total", total, "with mean", total / n)

Download find_numbers.1.py



Change the names of the specified files to lower case. (simple version of the perl utility rename)

Note use of test to check if the new filename is unchanged.

Note the double quotes around $filename so filenames containing spaces are not broken into multiple words

Note the use of mv -- to stop mv interpreting a filename beginning with - as an option

Note files named -n or -e still break the script because echo will treat them as an option,
if test $# = 0
then
    echo "Usage $0: <files>" 1>&2
    exit 1
fi

for filename in "$@"
do
    new_filename=$(echo "$filename" | tr '[:upper:]' '[:lower:]')

    test "$filename" = "$new_filename" &&
        continue

    if test -r "$new_filename"
    then
        echo "$0: $new_filename exists" 1>&2
    elif test -e "$filename"
    then
        mv -- "$filename" "$new_filename"
    else
        echo "$0: $filename not found" 1>&2
    fi

done

Download tolower.sh




Change the names of the specified files to lower case. (simple version of the perl utility rename)
import os
import sys

for old_pathname in sys.argv[1:]:
    new_pathname = old_pathname.lower()
    if new_pathname == old_pathname:
        continue
    if os.path.exists(new_pathname):
        print(f"{sys.argv[0]}: '{new_pathname}' exists", file=sys.stderr)
        continue
    try:
        os.rename(old_pathname, new_pathname)
    except OSError as e:
        print(f"{sys.argv[0]}: '{new_pathname}' {e}", file=sys.stderr)

Download rename_lower_case.py




Change the names of the specified files by substituting occurrances of regex with replacement (simple version of the perl utility rename)
import os
import re
import sys

if len(sys.argv) < 3:
    print(f"Usage: {sys.argv[0]} <regex> <replacement> [files]", file=sys.stderr)
    sys.exit(1)

regex = sys.argv[1]
replacement = sys.argv[2]

for old_pathname in sys.argv[3:]:
    new_pathname = re.sub(regex, replacement, old_pathname, count=1)
    if new_pathname == old_pathname:
        continue
    if os.path.exists(new_pathname):
        print(f"{sys.argv[0]}: '{new_pathname}' exists", file=sys.stderr)
        continue
    try:
        os.rename(old_pathname, new_pathname)
    except OSError as e:
        print(f"{sys.argv[0]}: '{new_pathname}' {e}", file=sys.stderr)

Download rename_regex.py




Change the names of the specified files by substituting occurrances of regex with replacement (simple version of the perl utility rename)
also demonstrating argument processing and use of eval
beware eval can allow arbitrary code execution, it should not be used where security is importnat

import argparse
import os
import re
import sys

parser = argparse.ArgumentParser()

# add  required arguments
parser.add_argument("regex", type=str, help="match against filenames")
parser.add_argument("replacement", type=str, help="replaces matches with this")
parser.add_argument("filenames", nargs="*", help="filenames to be changed")

# add some optional boolean arguments
parser.add_argument(
    "-d", "--dryrun", action="store_true", help="show changes but don't make them"
)
parser.add_argument(
    "-v", "--verbose", action="store_true", help="print more information"
)
parser.add_argument(
    "-e",
    "--eval",
    action="store_true",
    help="evaluate replacement as python expression, match available as _",
)

# optional integer argument which defaults to 1
parser.add_argument(
    "-n",
    "--replace_n_matches",
    type=int,
    default=1,
    help="replace n matches (0 for all matches)",
)

args = parser.parse_args()


def eval_replacement(match):
    """if --eval given, evaluate replacment string as Python
    with the variable _ set to the matching part of the filename
    """
    if not args.eval:
        return args.replacement
    _ = match.group(0)
    return str(eval(args.replacement))


for old_pathname in args.filenames:
    try:
        new_pathname = re.sub(
            args.regex, eval_replacement, old_pathname, count=args.replace_n_matches
        )
    except OSError as e:
        print(
            f"{sys.argv[0]}: '{old_pathname}': '{args.replacement}'  {e}",
            file=sys.stderr,
        )
        continue

    if new_pathname == old_pathname:
        if args.verbose:
            print("no change:", old_pathname)
        continue

    if os.path.exists(new_pathname):
        print(f"{sys.argv[0]}: '{new_pathname}' exists", file=sys.stderr)
        continue

    if args.dryrun:
        print(old_pathname, "would be renamed to", new_pathname)
        continue

    if args.verbose:
        print("'renaming", old_pathname, "to", new_pathname)
    try:
        os.rename(old_pathname, new_pathname)
    except OSError as e:
        print(f"{sys.argv[0]}: '{new_pathname}' {e}", file=sys.stderr)

Download rename_regex_eval.py

written by andrewtunsw.edu.au as a COMP(2041|9044) lecture example

For each file given as argument replace occurrences of Hermione allowing for some misspellings with Harry and vice-versa.
Relies on Zaphod not occurring in the text.
import re, sys, os

for filename in sys.argv[1:]:
    tmp_filename = filename + ".new"
    if os.path.exists(tmp_filename):
        print(f"{sys.argv[0]}: {tmp_filename} already exists\n", file=sys.stderr)
        sys.exit(1)
    with open(filename) as f:
        with open(tmp_filename, "w") as g:
            for line in f:
                changed_line = re.sub(r"Herm[io]+ne", "Zaphod", line)
                changed_line = changed_line.replace("Harry", "Hermione")
                changed_line = changed_line.replace("Zaphod", "Harry")
                g.write(changed_line)
    os.rename(tmp_filename, filename)

Download change_names.0.py

written by andrewtunsw.edu.au as a COMP(2041|9044) lecture example

For each file given as argument replace occurrences of Hermione allowing for some misspellings with Harry and vice-versa.
Relies on Zaphod not occurring in the text.
import re, sys, os, shutil, tempfile

for filename in sys.argv[1:]:
    with tempfile.NamedTemporaryFile(mode='w', delete=False) as tmp:
        with open(filename) as f:
            for line in f:
                changed_line = re.sub(r"Herm[io]+ne", "Zaphod", line)
                changed_line = changed_line.replace("Harry", "Hermione")
                changed_line = changed_line.replace("Zaphod", "Harry")
                tmp.write(changed_line)
    shutil.move(tmp.name, filename)

Download change_names.1.py

written by andrewtunsw.edu.au as a COMP(2041|9044) lecture example

For each file given as argument replace occurrences of Hermione allowing for some misspellings with Harry and vice-versa.
Relies on Zaphod not occurring in the text.
modified text is stored in a list then file over-written
import re, sys, os

for filename in sys.argv[1:]:
    changed_lines = []
    with open(filename) as f:
        for line in f:
            changed_line = re.sub(r"Herm[io]+ne", "Zaphod", line)
            changed_line = changed_line.replace("Harry", "Hermione")
            changed_line = changed_line.replace("Zaphod", "Harry")
            changed_lines.append(changed_line)
    with open(filename, "w") as g:
        g.write("".join(changed_lines))

Download change_names.2.py

written by andrewtunsw.edu.au as a COMP(2041|9044) lecture example

For each file given as argument replace occurrences of Hermione allowing for some misspellings with Harry and vice-versa.
Relies on Zaphod not occurring in the text.
modified text is stored in a single string then file over-written
import re, sys, os

for filename in sys.argv[1:]:
    changed_lines = []
    with open(filename) as f:
        text = f.read()
    changed_text = re.sub(r"Herm[io]+ne", "Zaphod", text)
    changed_text = changed_text.replace("Harry", "Hermione")
    changed_text = changed_text.replace("Zaphod", "Harry")
    with open(filename, "w") as g:
        g.write("".join(changed_text))

Download change_names.3.py

Python Modules
Make

Simple makefile
game : main.o graphics.o world.o 
	gcc -o game main.o graphics.o world.o

main.o : main.c graphics.h world.h
	gcc -c main.c

graphics.o : graphics.c world.h 
	gcc -c graphics.c

world.o : world.c world.h 
	gcc -c world.c

clean:
	rm -f game main.o graphics.o world.o

Download Makefile.simple

"""
Simple Python implementation of "make".
Parses makefile rules and stores them in a dict
then builds targets with a recursive function.
written by andrewt@unsw.edu.au as a COMP(2041|9044) lecture example
"""

import argparse
import collections
import os
import re
import sys
import subprocess


def main():
    """determine targets to build and build them"""
    parser = argparse.ArgumentParser()
    parser.add_argument("-f", "--makefile", default="Makefile")
    parser.add_argument("-n", "--dryrun", action="store_true")
    parser.add_argument("build_targets", nargs="*")
    args = parser.parse_args()
    rules = parse_makefile(args.makefile)
    # if not target is specified use first target in Makefile (if any)
    build_targets = args.build_targets or list(rules.keys())[:1]
    for target in build_targets:
        build(target, rules, args.dryrun)


def build(target, rules, dryrun=False):
    """recursively check dependencies and run commands as needed to build target"""
    (dependencies, build_commands) = rules.get(target, ([], []))
    build_needed = not os.path.exists(target)
    for d in dependencies:
        build(d, rules, dryrun)
        build_needed = build_needed or os.path.getmtime(d) > os.path.getmtime(target)
    if not build_needed:
        return
    if not build_commands and not os.path.exists(target):
        print("*** No rule to make target", target)
        sys.exit(1)
    for command in build_commands:
        print(command)
        if not dryrun:
            subprocess.run(command, shell=True)


def parse_makefile(makefile_name):
    """return dict mapping makefile targets to (dependencies, build commands) tuple"""
    rules = collections.OrderedDict()
    with open(makefile_name, encoding="utf-8") as f:
        while line := f.readline():
            if not (m := re.match(r"^(\S+)\s*:\s*(.*)", line)):
                continue
            target = m.group(1)
            dependencies = m.group(2).split()
            build_commands = []
            while (line := f.readline()).startswith("\t"):
                build_commands.append(line.strip())
            rules[target] = (dependencies, build_commands)
    return rules


if __name__ == "__main__":
    main()

Download make0.py


Simple makefile with variables & a comment
CC=clang-11
CFLAGS=-O3 -Wall

game : main.o graphics.o world.o 
	$(CC) $(CFLAGS) -o game main.o graphics.o world.o

main.o : main.c graphics.h world.h
	$(CC) $(CFLAGS) -c main.c

graphics.o : graphics.c world.h 
	$(CC) $(CFLAGS) -c graphics.c

world.o : world.c world.h 
	$(CC) $(CFLAGS) -c world.c

clean:
	rm -f game main.o graphics.o world.o

Download Makefile.variables

"""
Simple Python implementation of "make".
Parses makefile rules and stores them in a dict
then builds targets with a recursive function.
This version implements Makefile variables & comments.
written by andrewt@unsw.edu.au as a COMP(2041|9044) lecture example
"""

import argparse
import collections
import os
import re
import sys
import subprocess


def main():
    """determine targets to build and build them"""
    parser = argparse.ArgumentParser()
    parser.add_argument("-f", "--makefile", default="Makefile")
    parser.add_argument("-n", "--dryrun", action="store_true")
    parser.add_argument("build_targets", nargs="*")
    args = parser.parse_args()
    rules = parse_makefile(args.makefile)
    # if not target is specified use first target in Makefile (if any)
    build_targets = args.build_targets or list(rules.keys())[:1]
    for target in build_targets:
        build(target, rules, args.dryrun)


def build(target, rules, dryrun=False):
    """recursively check dependencies and run commands as needed to build target"""
    (dependencies, build_commands) = rules.get(target, ([], []))

    build_needed = not os.path.exists(target)
    for d in dependencies:
        build(d, rules)
        build_needed = build_needed or os.path.getmtime(d) > os.path.getmtime(target)
    if not build_needed:
        return

    if not build_commands and not os.path.exists(target):
        print("*** No rule to make target", target)
        sys.exit(1)

    for command in build_commands:
        print(command)
        if not dryrun:
            subprocess.run(command, shell=True)


def parse_makefile(makefile_name):
    """return dict mapping makefile targets to (dependencies, build commands) tuple"""
    rules = collections.OrderedDict()
    variables = {}
    with open(makefile_name, encoding="utf-8") as f:
        while line := f.readline():

            # remove any comment
            line = re.sub(r"#.*", "", line)

            # check for variable definition
            if m := re.match(r"^\s*(\S+)\s*=\s*(.*)", line):
                variables[m.group(1)] = m.group(2)
                continue

            line = replace_variables(line, variables)

            if not (m := re.match(r"^(\S+)\s*:\s*(.*)", line)):
                continue

            target = m.group(1)
            dependencies = m.group(2).split()

            build_commands = []
            while (line := f.readline()).startswith("\t"):
                line = replace_variables(line, variables)
                build_commands.append(line.strip())

            rules[target] = (dependencies, build_commands)
    return rules


def replace_variables(line, variables):
    """return line with occurances of $(variable) replaced by variable's value"""
    return re.sub(r"\$\((.*?)\)", lambda m: variables.get(m.group(1), ""), line)


if __name__ == "__main__":
    main()

Download make1.py

Linux Tools
use Debian bookworm as our base image
FROM debian:bookworm

# install packages and create some directories and a file
RUN \
	apt-get update &&\
	apt-get install -q -y apache2 curl &&\
	apt-get -q -y clean &&\
	echo ServerName localhost >/etc/apache2/conf-enabled/localhost.conf  &&\
	mkdir -p /var/run/apache2 /var/log/apache2 /var/lock &&\
	echo hello Andrew >/var/www/html/hello.html
	
# specify default command
ENTRYPOINT \
	apachectl start &&\
	bash -l &&\
	apachectl stop

Download Dockerfile

Exam