Software Construction
Course Resources
Administrivia: | Course Outline | Course Timetable | Help Sessions |
Administrivia: | COMP2041 Handbook | COMP9044 Handbook |
Shell: | Shell & Regex Quick Reference Card |
Python: | Python Quick Reference Card | Python Documentation |
Meet the Team: | Our Team |
Platforms: | Lectures (via YouTube) | Tut-Labs (via BbCollaborate (via Moodle)) | Course Forum |
Assessment: | Autotests, Submissions, Marks | Give online: submission | Give online: sturec |
Assignment Specifications: | Assignment 1 | Assignment 2 |
Course Content Week-by-Week
- Tutorial
- Laboratory
- Monday Week 1 Lecture Topics
- Thursday Week 1 Lecture Topics
- Tutorial
- Laboratory
- Weekly Test
- Monday Week 5 Lecture Topics
- Thursday Week 5 Lecture Topics
- Weekly Test
- Tutorial
- Laboratory
- Weekly Test
- Monday Week 7 Lecture Topics
- Thursday Week 7 Lecture Topics
- Tutorial
- Laboratory
- Weekly Test
- Monday Week 8 Lecture Topics
- Thursday Week 8 Lecture Topics
- Tutorial
- Laboratory
- Weekly Test
- Monday Week 9 Lecture Topics
- Thursday Week 9 Lecture Topics
- Tutorial
- Laboratory
- Weekly Test
- Monday Week 10 Lecture Topics
- Thursday Week 10 Lecture Topics
Course Content Topic-by-Topic
- Filters
#include <stdio.h>
#include <stdlib.h>
// write bytes of stream to stdout
void process_stream(FILE *stream) {
int byte;
while ((byte = fgetc(stream)) != EOF) {
if (fputc(byte, stdout) == EOF) {
perror("cat:");
exit(1);
}
}
}
// process files given as arguments
// if no arguments process stdin
int main(int argc, char *argv[]) {
if (argc == 1) {
process_stream(stdin);
} else {
for (int i = 1; i < argc; i++) {
FILE *in = fopen(argv[i], "r");
if (in == NULL) {
fprintf(stderr, "%s: %s: ", argv[0], argv[i]);
perror("");
return 1;
}
process_stream(in);
fclose(in);
}
}
return 0;
}
import sys
def process_stream(stream):
"""
copy bytes of f to stdout
"""
for line in stream:
print(line, end="")
def main():
"""
process files given as arguments, if no arguments process stdin
"""
if not sys.argv[1:]:
process_stream(sys.stdin)
else:
for pathname in sys.argv[1:]:
with open(pathname, "r") as f:
process_stream(f)
if __name__ == "__main__":
main()
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// print lines containing the specified substring
void process_stream(FILE *stream, char *name, char *substring) {
char *line = NULL;
size_t line_size = 0;
int line_number = 1;
while (getline(&line, &line_size, stream) > 0) {
if (strstr(line, substring) != NULL) {
printf("%s:%d:%s", name, line_number, line);
}
line_number++;
}
free(line);
}
// process files given as arguments
// if no arguments process stdin
int main(int argc, char *argv[]) {
if (argc == 2) {
process_stream(stdin, "<stdin>", argv[1]);
} else {
for (int i = 2; i < argc; i++) {
FILE *in = fopen(argv[i], "r");
if (in == NULL) {
fprintf(stderr, "%s: %s: ", argv[0], argv[i]);
perror("");
return 1;
}
process_stream(in, argv[i], argv[1]);
fclose(in);
}
}
return 0;
}
import sys
def process_stream(f, name, substring):
"""
print lines containing substring
"""
for (line_number, line) in enumerate(f, start=1):
if substring in line:
print(f'{name}:{line_number}:{line}', end='')
def main():
"""
process files given as arguments, if no arguments process stdin
"""
if len(sys.argv) == 2:
process_stream(sys.stdin, "<stdin>", sys.argv[1])
elif len(sys.argv) > 2:
for pathname in sys.argv[2:]:
with open(pathname, 'r') as f:
process_stream(f, pathname, sys.argv[1])
if __name__ == '__main__':
main()
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
// count lines, words, chars in stream
// assumes Unix-like line separator '\n'
// breaks on other platforms, see https://en.wikipedia.org/wiki/Newline
void process_stream(FILE *in, char *name) {
int n_lines = 0;
int n_words = 0;
int n_chars = 0;
int in_word = 0;
int c;
while ((c = fgetc(in)) != EOF) {
n_chars++;
if (c == '\n') {
n_lines++;
}
if (isspace(c)) {
in_word = 0;
} else if (!in_word) {
in_word = 1;
n_words++;
}
}
printf("%d %d %d %s\n", n_lines, n_words, n_chars, name);
}
// process files given as arguments
// if no arguments process stdin
int main(int argc, char *argv[]) {
if (argc == 1) {
process_stream(stdin, "stdin");
} else {
for (int i = 1; i < argc; i++) {
FILE *in = fopen(argv[i], "r");
if (in == NULL) {
fprintf(stderr, "%s: %s: ", argv[0], argv[i]);
perror("");
return 1;
}
process_stream(in, argv[i]);
fclose(in);
}
}
return 0;
}
import sys
import os
def process_stream(stream):
"""
count lines, words, chars in stream
"""
lines = 0
words = 0
characters = 0
for line in stream:
lines += line.endswith(os.linesep)
words += len(line.split())
characters += len(line)
print(f"{lines:>6} {words:>6} {characters:>6}", end="")
def main():
"""
process files given as arguments, if no arguments process stdin
"""
if not sys.argv[1:]:
process_stream(sys.stdin)
else:
for pathname in sys.argv[1:]:
with open(pathname, "r") as f:
process_stream(f)
print(f" {pathname}")
if __name__ == "__main__":
main()
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
// cope stream to stdout except for repeated lines
void process_stream(FILE *stream) {
char *line = NULL;
size_t line_size = 0;
char *last_line = NULL;
size_t last_line_size = 0;
while (getline(&line, &line_size, stream) > 0) {
if (last_line == NULL || strcmp(line, last_line) != 0) {
fputs(line, stdout);
}
// grow last_line if line has grown
if (last_line_size != line_size) {
last_line = realloc(last_line, line_size);
assert(last_line != NULL);
last_line_size = line_size;
}
strncpy(last_line, line, line_size);
}
free(line);
free(last_line);
}
// process files given as arguments
// if no arguments process stdin
int main(int argc, char *argv[]) {
if (argc == 1) {
process_stream(stdin);
} else {
FILE *in = fopen(argv[1], "r");
if (in == NULL) {
fprintf(stderr, "%s: %s: ", argv[0], argv[1]);
perror("");
return 1;
}
process_stream(in);
fclose(in);
}
return 0;
}
import sys
def process_stream(stream):
"""
copy stream to stdout except for repeated lines
"""
last_line = None
for line in stream:
if last_line is None or line != last_line:
print(line, end='')
last_line = line
def main():
"""
process files given as arguments, if no arguments process stdin
"""
if not sys.argv[1:]:
process_stream(sys.stdin)
else:
for pathname in sys.argv[1:]:
with open(pathname, 'r') as f:
process_stream(f)
if __name__ == '__main__':
main()
import sys
def process_stream(f):
"""
print lines of stream in sorted order
"""
print("".join(sorted(f)), end="")
def main():
"""
process files given as arguments, if no arguments process stdin
"""
if len(sys.argv) == 1:
process_stream(sys.stdin)
else:
with open(sys.argv[1], 'r') as f:
process_stream(f)
if __name__ == '__main__':
main()
import subprocess
import sys
# the real xargs runs the command multiple times if input is large
# the real xargs treats quotes specially
def main():
input_words = [w for line in sys.stdin for w in line.split()]
command = sys.argv[1:]
subprocess.run(command + input_words)
if __name__ == "__main__":
main()
grep
, wc
, head
, etc.)
It also contains solutions to the exercises discussed in lectures.
-
Consider a a file course_codes.tsv
containing UNSW course codes and names.
ls -l course_codes.tsv -rw-r--r-- 1 cs2041 cs2041 137255 Jul 14 2022 course_codes.tsv
wc course_codes.tsv 4159 17806 137255 course_codes.tsv
head course_codes.tsv ACCT1501 Accounting & Financial Mgt 1A ACCT1511 Accounting & Financial Mgt 1B ACCT2101 Industry Placement 1 ACCT2511 Financial Acct Fundamentals ACCT2522 Management Accounting 1 ACCT2542 Corporate Financial Reporting ACCT2672 Accounting analytics ACCT3202 Industry Placement 2 ACCT3303 Industry Placement 3 ACCT3563 Issues in Financial Reporting
It looks like the code is separated from the title by a number of spaces. We can check this viacat -A
:head -5 course_codes.tsv | cat -A ACCT1501^IAccounting & Financial Mgt 1A$ ACCT1511^IAccounting & Financial Mgt 1B$ ACCT2101^IIndustry Placement 1$ ACCT2511^IFinancial Acct Fundamentals$ ACCT2522^IManagement Accounting 1$
This shows us that our initial guess was wrong, and there's actually a tab character between the course code and title (shown as^I
bycat -A
). Also, the location of the end-of-line marker ($
) indicates that there are no trailing spaces or tabs.If we need to know what COMP courses there are:
grep -E -c COMP course_codes.tsv 115
grep -E COMP course_codes.tsv COMP1010 The Art of Computing COMP1511 Programming Fundamentals COMP1521 Computer Systems Fundamentals COMP1531 Software Eng Fundamentals COMP1911 Computing 1A COMP2041 Software Construction COMP2111 System Modelling and Design COMP2511 O-O Design & Programming COMP2521 Data Structures and Algorithms COMP3121 Algorithms & Programming Tech COMP3131 Programming Languages & Compil COMP3141 Software Sys Des&Implementat'n COMP3151 Foundations of Concurrency COMP3153 Algorithmic Verification COMP3161 Concepts of Programming Lang. COMP3211 Computer Architecture COMP3222 Digital Circuits and Systems COMP3231 Operating Systems COMP3311 Database Systems COMP3331 Computer Networks&Applications COMP3411 Artificial Intelligence COMP3421 Computer Graphics COMP3431 Robotic Software Architecture COMP3511 Human Computer Interaction COMP3601 Design Project A COMP3821 Ext Algorithms&Prog Techniques COMP3891 Ext Operating Systems COMP3900 Computer Science Project COMP3901 Special Project A COMP3902 Special Project B COMP4121 Advanced Algorithms COMP4128 Programming Challenges COMP4141 Theory of Computation COMP4161 Advanced Verification COMP4336 Mobile Data Networking COMP4337 Securing Fixed & Wireless Netw COMP4418 Knowledge Representation COMP4511 User Interface Design & Constr COMP4601 Design Project B COMP4920 Professional Issues and Ethics COMP4951 Research Thesis A COMP4952 Research Thesis B COMP4953 Research Thesis C COMP4961 Computer Science Thesis A COMP4962 Computer Science Thesis B COMP4963 Computer Science Thesis C COMP6080 Web Front-End Programming COMP6441 Security Engineering COMP6443 Web Application Security COMP6445 Digital Forensics COMP6447 Security Assessment COMP6448 Security Masterclass COMP6451 Cryptocurrency and DLT COMP6452 Blockchain App Architecture COMP6714 Info Retrieval and Web Search COMP6721 (In-)Formal Methods COMP6733 Internet of Things COMP6741 Algorithms for Intractable Pbs COMP6752 Modelling Concurrent Systems COMP6771 Advanced C++ Programming COMP6841 Extended Security Engineering COMP6843 Extended WebApp Security COMP6845 Extended Digital Forensics COMP9020 Foundations of Comp. Science COMP9021 Principles of Programming COMP9024 Data Structures & Algorithms COMP9032 Microprocessors & Interfacing COMP9044 Software Construction COMP9101 Design &Analysis of Algorithms COMP9102 Programming Lang & Compilers COMP9153 Algorithmic Verification COMP9154 Foundations of Concurrency COMP9164 Concepts of Programming Lang. COMP9201 Operating Systems COMP9211 Computer Architecture COMP9222 Digital Circuits and Systems COMP9242 Advanced Operating Systems COMP9243 Distributed Systems COMP9283 Ext Operating Systems COMP9301 Cyber Security Project COMP9302 Cyber Security Project B COMP9311 Database Systems COMP9312 Data Analytics for Graphs COMP9313 Big Data Management COMP9315 Database Systems Implementat'n COMP9319 Web Data Compression & Search COMP9321 Data Services Engineering COMP9322 Software Service Design & Eng COMP9323 e-Enterprise Project COMP9331 Computer Networks&Applications COMP9332 Network Routing and Switching COMP9334 Systems Capacity Planning COMP9336 Mobile Data Networking COMP9337 Securing Fixed & Wireless Netw COMP9414 Artificial Intelligence COMP9415 Computer Graphics COMP9417 Machine Learning & Data Mining COMP9418 Advanced Machine Learning COMP9434 Robotic Software Architecture COMP9444 Neural Networks, Deep Learning COMP9447 Security Engineering Workshop COMP9491 Applied AI COMP9511 Human Computer Interaction COMP9517 Computer Vision COMP9596 Research Project COMP9727 Recommender Systems COMP9801 Ext Design&Analysis of Algo COMP9814 Ext Artificial Intelligence COMP9900 Info Tech Project COMP9901 P/T Res. Thesis Comp Sci & Eng COMP9902 Res. Thesis Comp Sci & Eng F/T COMP9945 Research Project COMP9991 Research Project A COMP9992 Research Project B COMP9993 Research Project C
Either of the two commands below tell us which courses have "comp" in their name or code (in upper or lower case).tr A-Z a-z <course_codes.tsv | grep -E comp benv7503 geocomputation binf3020 computational bioinformatics binf9020 computational bioinformatics biom9332 biocompatibility ceic6711 complex fluids chem3031 inorg chem:trans metals & comp code1110 computational design theory 1 code1210 computational design theory 2 code2121 advanced computational design code2132 computational design studio iv comp1010 the art of computing comp1511 programming fundamentals comp1521 computer systems fundamentals comp1531 software eng fundamentals comp1911 computing 1a comp2041 software construction comp2111 system modelling and design comp2511 o-o design & programming comp2521 data structures and algorithms comp3121 algorithms & programming tech comp3131 programming languages & compil comp3141 software sys des&implementat'n comp3151 foundations of concurrency comp3153 algorithmic verification comp3161 concepts of programming lang. comp3211 computer architecture comp3222 digital circuits and systems comp3231 operating systems comp3311 database systems comp3331 computer networks&applications comp3411 artificial intelligence comp3421 computer graphics comp3431 robotic software architecture comp3511 human computer interaction comp3601 design project a comp3821 ext algorithms&prog techniques comp3891 ext operating systems comp3900 computer science project comp3901 special project a comp3902 special project b comp4121 advanced algorithms comp4128 programming challenges comp4141 theory of computation comp4161 advanced verification comp4336 mobile data networking comp4337 securing fixed & wireless netw comp4418 knowledge representation comp4511 user interface design & constr comp4601 design project b comp4920 professional issues and ethics comp4951 research thesis a comp4952 research thesis b comp4953 research thesis c comp4961 computer science thesis a comp4962 computer science thesis b comp4963 computer science thesis c comp6080 web front-end programming comp6441 security engineering comp6443 web application security comp6445 digital forensics comp6447 security assessment comp6448 security masterclass comp6451 cryptocurrency and dlt comp6452 blockchain app architecture comp6714 info retrieval and web search comp6721 (in-)formal methods comp6733 internet of things comp6741 algorithms for intractable pbs comp6752 modelling concurrent systems comp6771 advanced c++ programming comp6841 extended security engineering comp6843 extended webapp security comp6845 extended digital forensics comp9020 foundations of comp. science comp9021 principles of programming comp9024 data structures & algorithms comp9032 microprocessors & interfacing comp9044 software construction comp9101 design &analysis of algorithms comp9102 programming lang & compilers comp9153 algorithmic verification comp9154 foundations of concurrency comp9164 concepts of programming lang. comp9201 operating systems comp9211 computer architecture comp9222 digital circuits and systems comp9242 advanced operating systems comp9243 distributed systems comp9283 ext operating systems comp9301 cyber security project comp9302 cyber security project b comp9311 database systems comp9312 data analytics for graphs comp9313 big data management comp9315 database systems implementat'n comp9319 web data compression & search comp9321 data services engineering comp9322 software service design & eng comp9323 e-enterprise project comp9331 computer networks&applications comp9332 network routing and switching comp9334 systems capacity planning comp9336 mobile data networking comp9337 securing fixed & wireless netw comp9414 artificial intelligence comp9415 computer graphics comp9417 machine learning & data mining comp9418 advanced machine learning comp9434 robotic software architecture comp9444 neural networks, deep learning comp9447 security engineering workshop comp9491 applied ai comp9511 human computer interaction comp9517 computer vision comp9596 research project comp9727 recommender systems comp9801 ext design&analysis of algo comp9814 ext artificial intelligence comp9900 info tech project comp9901 p/t res. thesis comp sci & eng comp9902 res. thesis comp sci & eng f/t comp9945 research project comp9991 research project a comp9992 research project b comp9993 research project c cven2002 engineering computations cven9820 computational struct mechanics cven9822 steel & composite structures dpst1092 computer systems fundamentals elec4605 quantum devices and computers elec4632 computer control systems engg1811 computing for engineers fins3649 compliance & regulatory tech fins5549 regtech and compliance fndn0301 computing studies fndn0303 computing for design fndn0304 computing for academic purpose fndn0305 computing for acad purpose h fndn0306 computing for acad purposes s fndn0311 computing studies - t fndn0312 computing academic purpose - t fndn0314 computing for design - t fndn0315 computing for acad purpose th fndn0316 computing for acad purp - ts gmat2500 surveying computations a gmat2550 surveying computations b hdat9300 computing 4 hdat ides3231 adv computer aided product des infs4858 managing complex projects infs5848 managing complex projects inta1002 interior arch composition 2 inta1003 interior arch composition 3 jurd7468 aust legal system comp perspec jurd7522 competition law jurd7549 child rights comp clinic jurd7603 global issues in comp policy jurd7610 mediation competition jurd7616 international & comparative ip laws3022 competition law laws3368 aust legal system comp perspec laws3510 mediation competition laws3549 child rights comp clinic laws8016 international & comparative ip laws8168 aust legal system comp perspec laws8203 global issues in comp policy laws8349 child rights comp clinic manf9543 comp aided design/manufacture math2301 mathematical computing math2521 complex analysis math2621 higher complex analysis math3101 comp maths science & eng math3311 comp mathematics for finance math3821 stat modelling & computing math3871 bayesian inference and comp math5305 comp maths science & eng math5335 comp mathematics for finance math5856 intro to stats and stat comput math5960 bayesian inference & comput'n mats4005 composites and functional mats mats6110 computational materials mech4620 computational fluid dynamics mech9420 composite materials and mechan mgmt2106 comparative management systems mtrn2500 comp for mtrn mtrn3500 comp appl in mechatonic sys phcm9471 comparative h'lth care systems ptrl5016 well completions & stimulation ptrl6016 well completions & stimulation socf5112 complex practice issues tabl5544 comparative tax systems tabl5545 principles of reg, comp&enfor tabl5546 intro to regtech in comp&enfor tabl5547 building effec. comp. sys ymed3006 comparative health systems zeit9100 computer science research f/t zeit9101 computer science research p/t
grep -E -i comp course_codes.tsv BENV7503 Geocomputation BINF3020 Computational Bioinformatics BINF9020 Computational Bioinformatics BIOM9332 Biocompatibility CEIC6711 Complex Fluids CHEM3031 Inorg Chem:Trans Metals & Comp CODE1110 Computational Design Theory 1 CODE1210 Computational Design Theory 2 CODE2121 Advanced Computational Design CODE2132 Computational Design Studio IV COMP1010 The Art of Computing COMP1511 Programming Fundamentals COMP1521 Computer Systems Fundamentals COMP1531 Software Eng Fundamentals COMP1911 Computing 1A COMP2041 Software Construction COMP2111 System Modelling and Design COMP2511 O-O Design & Programming COMP2521 Data Structures and Algorithms COMP3121 Algorithms & Programming Tech COMP3131 Programming Languages & Compil COMP3141 Software Sys Des&Implementat'n COMP3151 Foundations of Concurrency COMP3153 Algorithmic Verification COMP3161 Concepts of Programming Lang. COMP3211 Computer Architecture COMP3222 Digital Circuits and Systems COMP3231 Operating Systems COMP3311 Database Systems COMP3331 Computer Networks&Applications COMP3411 Artificial Intelligence COMP3421 Computer Graphics COMP3431 Robotic Software Architecture COMP3511 Human Computer Interaction COMP3601 Design Project A COMP3821 Ext Algorithms&Prog Techniques COMP3891 Ext Operating Systems COMP3900 Computer Science Project COMP3901 Special Project A COMP3902 Special Project B COMP4121 Advanced Algorithms COMP4128 Programming Challenges COMP4141 Theory of Computation COMP4161 Advanced Verification COMP4336 Mobile Data Networking COMP4337 Securing Fixed & Wireless Netw COMP4418 Knowledge Representation COMP4511 User Interface Design & Constr COMP4601 Design Project B COMP4920 Professional Issues and Ethics COMP4951 Research Thesis A COMP4952 Research Thesis B COMP4953 Research Thesis C COMP4961 Computer Science Thesis A COMP4962 Computer Science Thesis B COMP4963 Computer Science Thesis C COMP6080 Web Front-End Programming COMP6441 Security Engineering COMP6443 Web Application Security COMP6445 Digital Forensics COMP6447 Security Assessment COMP6448 Security Masterclass COMP6451 Cryptocurrency and DLT COMP6452 Blockchain App Architecture COMP6714 Info Retrieval and Web Search COMP6721 (In-)Formal Methods COMP6733 Internet of Things COMP6741 Algorithms for Intractable Pbs COMP6752 Modelling Concurrent Systems COMP6771 Advanced C++ Programming COMP6841 Extended Security Engineering COMP6843 Extended WebApp Security COMP6845 Extended Digital Forensics COMP9020 Foundations of Comp. Science COMP9021 Principles of Programming COMP9024 Data Structures & Algorithms COMP9032 Microprocessors & Interfacing COMP9044 Software Construction COMP9101 Design &Analysis of Algorithms COMP9102 Programming Lang & Compilers COMP9153 Algorithmic Verification COMP9154 Foundations of Concurrency COMP9164 Concepts of Programming Lang. COMP9201 Operating Systems COMP9211 Computer Architecture COMP9222 Digital Circuits and Systems COMP9242 Advanced Operating Systems COMP9243 Distributed Systems COMP9283 Ext Operating Systems COMP9301 Cyber Security Project COMP9302 Cyber Security Project B COMP9311 Database Systems COMP9312 Data Analytics for Graphs COMP9313 Big Data Management COMP9315 Database Systems Implementat'n COMP9319 Web Data Compression & Search COMP9321 Data Services Engineering COMP9322 Software Service Design & Eng COMP9323 e-Enterprise Project COMP9331 Computer Networks&Applications COMP9332 Network Routing and Switching COMP9334 Systems Capacity Planning COMP9336 Mobile Data Networking COMP9337 Securing Fixed & Wireless Netw COMP9414 Artificial Intelligence COMP9415 Computer Graphics COMP9417 Machine Learning & Data Mining COMP9418 Advanced Machine Learning COMP9434 Robotic Software Architecture COMP9444 Neural Networks, Deep Learning COMP9447 Security Engineering Workshop COMP9491 Applied AI COMP9511 Human Computer Interaction COMP9517 Computer Vision COMP9596 Research Project COMP9727 Recommender Systems COMP9801 Ext Design&Analysis of Algo COMP9814 Ext Artificial Intelligence COMP9900 Info Tech Project COMP9901 P/T Res. Thesis Comp Sci & Eng COMP9902 Res. Thesis Comp Sci & Eng F/T COMP9945 Research Project COMP9991 Research Project A COMP9992 Research Project B COMP9993 Research Project C CVEN2002 Engineering Computations CVEN9820 Computational Struct Mechanics CVEN9822 Steel & Composite Structures DPST1092 Computer Systems Fundamentals ELEC4605 Quantum Devices and Computers ELEC4632 Computer Control Systems ENGG1811 Computing for Engineers FINS3649 Compliance & Regulatory Tech FINS5549 RegTech and Compliance FNDN0301 Computing Studies FNDN0303 Computing for Design FNDN0304 Computing for Academic Purpose FNDN0305 Computing for Acad Purpose H FNDN0306 Computing for Acad Purposes S FNDN0311 Computing Studies - T FNDN0312 Computing Academic Purpose - T FNDN0314 Computing for Design - T FNDN0315 Computing for Acad Purpose TH FNDN0316 Computing for Acad Purp - TS GMAT2500 Surveying Computations A GMAT2550 Surveying Computations B HDAT9300 Computing 4 HDAT IDES3231 Adv Computer Aided Product Des INFS4858 Managing Complex Projects INFS5848 Managing Complex Projects INTA1002 Interior Arch Composition 2 INTA1003 Interior Arch Composition 3 JURD7468 Aust Legal System Comp Perspec JURD7522 Competition Law JURD7549 Child Rights Comp Clinic JURD7603 Global Issues in Comp Policy JURD7610 Mediation Competition JURD7616 International & Comparative IP LAWS3022 Competition Law LAWS3368 Aust Legal System Comp Perspec LAWS3510 Mediation Competition LAWS3549 Child Rights Comp Clinic LAWS8016 International & Comparative IP LAWS8168 Aust Legal System Comp Perspec LAWS8203 Global Issues in Comp Policy LAWS8349 Child Rights Comp Clinic MANF9543 Comp Aided Design/Manufacture MATH2301 Mathematical Computing MATH2521 Complex Analysis MATH2621 Higher Complex Analysis MATH3101 Comp Maths Science & Eng MATH3311 Comp Mathematics for Finance MATH3821 Stat Modelling & Computing MATH3871 Bayesian Inference and Comp MATH5305 Comp Maths Science & Eng MATH5335 Comp Mathematics for Finance MATH5856 Intro to Stats and Stat Comput MATH5960 Bayesian Inference & Comput'n MATS4005 Composites and Functional Mats MATS6110 Computational Materials MECH4620 Computational Fluid Dynamics MECH9420 Composite Materials and Mechan MGMT2106 Comparative Management Systems MTRN2500 Comp for MTRN MTRN3500 Comp Appl in Mechatonic Sys PHCM9471 Comparative H'lth Care Systems PTRL5016 Well Completions & Stimulation PTRL6016 Well Completions & Stimulation SOCF5112 Complex Practice Issues TABL5544 Comparative Tax Systems TABL5545 Principles of Reg, Comp&Enfor TABL5546 Intro to RegTech in Comp&Enfor TABL5547 Building Effec. Comp. Sys YMED3006 Comparative Health Systems ZEIT9100 Computer Science Research F/T ZEIT9101 Computer Science Research P/T
The second one looks better because the data itself isn't transformed, only the internal comparisons.If we want to know how many courses have "computing" or "computer" in their title, we have to use
grep -E
, which recognises the alternative operator "|", andwc
to count the number of matches. There are a couple of ways to construct the regexp:grep -E -i 'computer|computing' course_codes.tsv | wc 36 159 1236
grep -E -i 'comput(er|ing)' course_codes.tsv | wc 36 159 1236
If you don't like the irrelevant word and character counts, usewc -l
.Most of these 80 matches were CSE offerings, whose course codes begin with COMP, SENG or BINF. Which of the matches were courses offered by other schools?
Think about it for a moment.... There's no "but not" regexp operator, so instead we construct a composite filter with an extra step to deal with eliminating the CSE courses:
grep -E -i 'computer|computing' course_codes.tsv | grep -E -v '^(COMP|SENG|BINF)' DPST1092 Computer Systems Fundamentals ELEC4605 Quantum Devices and Computers ELEC4632 Computer Control Systems ENGG1811 Computing for Engineers FNDN0301 Computing Studies FNDN0303 Computing for Design FNDN0304 Computing for Academic Purpose FNDN0305 Computing for Acad Purpose H FNDN0306 Computing for Acad Purposes S FNDN0311 Computing Studies - T FNDN0312 Computing Academic Purpose - T FNDN0314 Computing for Design - T FNDN0315 Computing for Acad Purpose TH FNDN0316 Computing for Acad Purp - TS HDAT9300 Computing 4 HDAT IDES3231 Adv Computer Aided Product Des MATH2301 Mathematical Computing MATH3821 Stat Modelling & Computing ZEIT9100 Computer Science Research F/T ZEIT9101 Computer Science Research P/T
The last ones are from the Computer Science school at ADFA. -
Consider a file called enrollments.txt
which contains data about student enrollment in courses.
There is one line for each student enrolled in a course:
ls -l enrollments.txt -rw-r--r-- 1 cs2041 cs2041 685532 Jul 14 2022 enrollments.txt
wc enrollments.txt 9832 24651 685532 enrollments.txt
head enrollments.txt COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M COMP4952|5200422|Ji, Aidan Han|3778/2|SENGAH|083.357|22T2|19960203|M COMP1511|5296066|Xue, Marko|8543|COMPZ1|080.000|22T2|19980306|M COMP1511|5294960|Zhou, Yvonne|3784/3|COMPA1|085.583|22T2|20001009|F COMP9511|5242743|Luong, Zihan|3707/1|BINFB1|068.000|22T2|20060117|F COMP1911|5259200|Wang, Jiaqi Joanne|8543|COMPBH|081.125|22T2|19950826|F COMP9902|5241298|Li, Laura|8543|COMPI1|084.000|22T2|20000330|F COMP9024|5245288|Thomas, George|3789/2|SENGAH|079.923|22T2|19970427|M COMP2041|5298688|Ng, Henry|8543|COMPA1|076.000|22T2|20031212|M COMP3141|5298688|Ng, Henry|8543|COMPA1|076.000|22T2|20031212|M
The following commands count how many students are enrolled in COMP2041 or COMP9041. The course IDs differ only in one character, so a character class is used instead of alternation.The first version below is often ferred because initially you may want to know "how many xxx", then having found that out the next question might be, "well give me a sample of 10 or so of them". Then it's a simple matter of replacing
wc
byhead
.grep -E '^COMP(2041|9044)' enrollments.txt | wc -l 802
grep -E -c '^COMP(2041|9044)' enrollments.txt 802
The last field field in the enrollment file records the student's gender. This command counts the number of female students enrolled in the courses.grep -E '^COMP(2041|9044)' enrollments.txt | grep -E 'F$' | wc -l 193
Not a very good gender balance, is it?By the way, the two
grep -E
s could have been combined into one. How?This command will give a sorted list of course codes:
cut -d'|' -f1 enrollments.txt | sort | uniq COMP1010 COMP1511 COMP1521 COMP1531 COMP1911 COMP2041 COMP2511 COMP2521 COMP3121 COMP3141 COMP3151 COMP3153 COMP3331 COMP3511 COMP3900 COMP3901 COMP4336 COMP4601 COMP4951 COMP4952 COMP4953 COMP4961 COMP4962 COMP4963 COMP6443 COMP6447 COMP6452 COMP6721 COMP6741 COMP6771 COMP6843 COMP9020 COMP9021 COMP9024 COMP9044 COMP9101 COMP9153 COMP9154 COMP9242 COMP9311 COMP9312 COMP9313 COMP9319 COMP9323 COMP9331 COMP9336 COMP9414 COMP9417 COMP9444 COMP9447 COMP9491 COMP9511 COMP9517 COMP9727 COMP9900 COMP9901 COMP9902 COMP9991 COMP9992 COMP9993
The student records system known to users as myUNSW is built on top of a large US product known as PeopleSoft (the company was taken over by Oracle in 2004). On a scale of 1 to 10 the quality of the design of this product is about 3. One of its many flaws is its insistence that everybody must have two names, a "Last Name" and a "First Name", neither of which can be empty. To signify that a person has only a single name (common in Sri Lanka, for example), the system stores a dot character in the "First Name" field. The enrollments file shows the data as stored in the system, with a comma and space separating the component names. It has some single-named people (note that the names themselves have been disguised):grep -E ', \.' enrollments.txt COMP1511|5230122|So, .|8543|COMPAS|075.286|22T2|19970908|F COMP2511|5240102|Lu, .|1650|COMPCS|083.929|22T2|19951024|M COMP3331|5240102|Lu, .|1650|COMPCS|083.929|22T2|19951024|M COMP2511|5224053|Tran, .|3785/3|COMPY1|080.033|22T2|20031206|F COMP3121|5224053|Tran, .|3785/3|COMPY1|080.033|22T2|20031206|F COMP3331|5224053|Tran, .|3785/3|COMPY1|080.033|22T2|20031206|F COMP3511|5281400|Park, .|8543|COMPA1|073.900|22T2|19940714|F COMP9900|5280116|Lim, .|3778/2|ELECAH|072.563|22T2|20030705|M COMP1531|5258398|Mai, .|8543|COMPCS|076.133|22T2|19950918|F COMP9313|5279423|Patel, .|3782/3|COMPA1|073.737|22T2|19990814|M COMP9414|5279423|Patel, .|3782/3|COMPA1|073.737|22T2|19990814|M COMP6771|5287674|Bui, .|3706/3|COMPA1|076.000|22T2|20001026|M COMP9331|5287674|Bui, .|3706/3|COMPA1|076.000|22T2|20001026|M COMP1531|5229312|So, .|8543|COMPA1|070.889|22T2|19971005|F COMP1511|5236671|Tan, .|3764/5|COMPA1|065.333|22T2|20030519|F COMP2041|5254501|Guo, .|8543|COMPFR|056.400|22T2|20000617|F COMP2521|5254501|Guo, .|8543|COMPFR|056.400|22T2|20000617|F COMP2511|5282825|Xie, .|3778/2|COMPA1 FINSA1|076.333|22T2|20030117|F COMP2521|5289353|Zheng, .|8543|COMPFR|073.952|22T2|20041116|F COMP1511|5240422|Yuan, .|8543|BINFAH|034.667|22T2|19950529|F COMP1521|5279246|Lam, .|3778/3|SENGAH|087.344|22T2|19941123|F COMP3121|5280766|Tian, .|3785/2|COMPA1 MATHP1|000.000|22T2|19990117|F COMP6721|5280766|Tian, .|3785/2|COMPA1 MATHP1|000.000|22T2|19990117|F COMP6771|5280766|Tian, .|3785/2|COMPA1 MATHP1|000.000|22T2|19990117|F COMP9313|5274705|Rahman, .|8543|COMPA1|092.167|22T2|20010611|M COMP3900|5291673|Qin, .|3778/3|COMPIS|079.375|22T2|20060512|M COMP9020|5205426|Zhao, .|3784/4|SENGAH|074.692|22T2|19941106|F COMP3511|5260150|Feng, .|3764/1|COMPAS|067.455|22T2|20000108|F COMP3331|5232121|Lim, .|3707/2|ECONA1 MATHK1|072.000|22T2|20001229|F COMP6452|5232121|Lim, .|3707/2|ECONA1 MATHK1|072.000|22T2|20001229|F COMP9417|5281669|Lu, .|3961/1|COMPAS|077.500|22T2|20000612|F COMP9517|5281669|Lu, .|3961/1|COMPAS|077.500|22T2|20000612|F COMP9902|5267614|Jiang, .|3789/1|COMPA1|078.667|22T2|20040623|F COMP9902|5277612|Cai, .|3707/4|COMPA1|071.125|22T2|19971207|F COMP1521|5224424|Luong, .|3784/3|SENGAH|070.857|22T2|20050628|F COMP1531|5282863|Zhou, .|8543|SENGAH|079.000|22T2|19980225|M COMP1531|5299583|Lewis, .|8543|MTRNAH|064.429|22T2|19960408|M COMP2521|5299583|Lewis, .|8543|MTRNAH|064.429|22T2|19960408|M COMP9444|5278776|Zhong, .|3778/3|COMPA1 SOLABH|070.667|22T2|19970416|M COMP9517|5278776|Zhong, .|3778/3|COMPA1 SOLABH|070.667|22T2|19970416|M COMP9021|5246511|Huang, .|3674/4|COMPAS|063.727|22T2|20030526|M COMP9024|5246511|Huang, .|3674/4|COMPAS|063.727|22T2|20030526|M COMP9311|5246511|Huang, .|3674/4|COMPAS|063.727|22T2|20030526|M COMP3331|5201179|Han, .|3707/3|MTRNAH|068.429|22T2|20000519|M COMP6771|5201179|Han, .|3707/3|MTRNAH|068.429|22T2|20000519|M COMP1531|5224314|Choi, .|3778/1|COMPAS|071.091|22T2|19971129|M COMP9313|5203989|Xue, .|3673/1|ENGGAH|076.167|22T2|19980706|M COMP3121|5237499|Lai, .|3784/1|SENGAH|078.833|22T2|19941005|F COMP9901|5219299|Wu, .|3778/1|COMPAS|082.821|22T2|19980321|F COMP1521|5213563|Xu, .|3707/2|COMPSS|056.529|22T2|20020212|F COMP1521|5265834|Shi, .|3791/1|COMPA1|070.846|22T2|19990129|F COMP6452|5243392|Fan, .|3785/1|BINFB1|076.000|22T2|20020105|F COMP9331|5243392|Fan, .|3785/1|BINFB1|076.000|22T2|20020105|F COMP9517|5243392|Fan, .|3785/1|BINFB1|076.000|22T2|20020105|F
What would have happened if we forgot the backslash?If we wanted to know how many different students there were of this type rather than all enrollments, just cut out the second field (student ID) and use
uniq
. It's not necessary to sort the data in this case only because the data is clustered, that is, all equal values are adjacent although they're not necessarily sorted.grep -E ', \.' enrollments.txt | cut -d'|' -f2 | uniq | wc 37 37 296
-
Now let us turn our attention from students and courses to programs.
The enrollments file, as well as linking
a student to the courses they're taking, also links them to the
program (degree) that they are currently enrolled in.
Consider that we want to find out the program codes of the students
taking COMP2041.
The following pipeline will do this:
grep -E 'CCOMP(2041|9044)' enrollments.txt | cut -d'|' -f4 | cut -d/ -f1 | sort | uniq
If we want to know how many students come from each program, ordered from most common program to least common program, try this:grep -E 'COMP(2041|9044)' enrollments.txt | cut -d'|' -f4 | cut -d/ -f1 | sort | uniq -c | sort -nr 223 8543 153 3778 109 3707 62 3784 40 3785 29 3674 16 3789 15 3959 14 3764 13 3768 12 3783 12 3781 11 3791 11 1650 10 7543 10 3782 7 3970 5 3762 4 3767 4 3706 3 4515 3 3736 3 3673 3 2645 2 8959 2 3991 2 3956 2 3786 2 3761 2 3409 1 8750 1 8338 1 8161 1 7021 1 4822 1 4500 1 3987 1 3978 1 3925 1 3738 1 3632 1 3584 1 3529 1 3523 1 3155 1 3154 1 1710 1 1550
Note that a tab is usually inserted between the count and the data, but not all implementations of theuniq
command ensure this. -
Consider a file called program_codes.tsv that contains the code and name of each program offered at UNSW (excluding research programs):
wc program_codes.tsv 250 1001 7295 program_codes.tsv
head program_codes.tsv 1004 Joint PhD 1292 PhD Art, Design and Media 1400 Psychology 1540 Economics 1545 Actuarial Studies 1550 Marketing 1561 Banking and Finance 1630 Civil & Environmental Eng 1640 Electrical Engineering 1650 Computer Science and Eng
We can use this file to give more details of the programs that COMP2041 students are taking, if some users don't want to deal with just course codes.grep -E 'COMP(2041|9044)' enrollments.txt | cut -d'|' -f4 | cut -d/ -f1 | sort | uniq | join - program_codes.tsv 1550 Marketing 1650 Computer Science and Eng 1710 Biomedical Engineering 2645 Engineering (MPhil) 3154 Actuarial Studies / Science 3155 Actuarial Studies / Commerce 3409 Arts 3523 Advanced Maths (Hons)/Commerce 3529 Commerce/Science 3584 Commerce / Information Systems 3632 Advanced Science (Honours) 3673 Economics / Computer Science 3674 Actuarial Stu / Computer Sci 3706 Engineering Science 3707 Engineering (Honours) 3736 BE (Hons) ME Elec Eng 3738 Engineering Science / CompSc 3761 Adv Math (Hons) / Eng (Hons) 3762 AdvSci(Hons)/Engineering(Hons) 3764 Engineering (Hons)/Commerce 3767 Engineering (Hons) / Science 3768 Eng (Hons) / MBiomedE 3778 Computer Science 3781 Adv Maths (Hons) / Comp Sci 3782 Adv Science (Hons) / Comp Sci 3783 Computer Science / Arts 3784 Commerce / Computer Science 3785 Engineering (Hons) / Comp Sci 3786 Computer Science / Law 3789 Science / Computer Science 3791 Computer Science / Media Arts 3925 Science and Business 3956 Advanced Mathematics (Honours) 3959 Data Science and Decisions 3970 Science 3978 Computer Science 3987 Advanced Science (Honours) 3991 Medical Science 4500 Science (Honours) 4515 Comp Sci & Eng (Honours) 4822 Design 7543 Computing 8161 Financial Mathematics 8338 Engineering Science 8543 Information Technology 8750 Statistics 8959 Data Science and Decisions
We can combine the enrollment counts (for both courses) with the program titles to produce a self-descriptive tally. It's even better if it's in decreasing order of popularity, so after joining the tallies with the program titles, re-sort the composite data:grep -E 'COMP(2041|9044)' enrollments.txt | cut -d'|' -f4 | cut -d/ -f1 | sort | uniq -c | join -1 2 -a 1 - program_codes.tsv | sort -k2rn 8543 223 Information Technology 3778 153 Computer Science 3707 109 Engineering (Honours) 3784 62 Commerce / Computer Science 3785 40 Engineering (Hons) / Comp Sci 3674 29 Actuarial Stu / Computer Sci 3789 16 Science / Computer Science 3959 15 Data Science and Decisions 3764 14 Engineering (Hons)/Commerce 3768 13 Eng (Hons) / MBiomedE 3781 12 Adv Maths (Hons) / Comp Sci 3783 12 Computer Science / Arts 1650 11 Computer Science and Eng 3791 11 Computer Science / Media Arts 3782 10 Adv Science (Hons) / Comp Sci 7543 10 Computing 3970 7 Science 3762 5 AdvSci(Hons)/Engineering(Hons) 3706 4 Engineering Science 3767 4 Engineering (Hons) / Science 2645 3 Engineering (MPhil) 3673 3 Economics / Computer Science 3736 3 BE (Hons) ME Elec Eng 4515 3 Comp Sci & Eng (Honours) 3409 2 Arts 3761 2 Adv Math (Hons) / Eng (Hons) 3786 2 Computer Science / Law 3956 2 Advanced Mathematics (Honours) 3991 2 Medical Science 8959 2 Data Science and Decisions 1550 1 Marketing 1710 1 Biomedical Engineering 3154 1 Actuarial Studies / Science 3155 1 Actuarial Studies / Commerce 3523 1 Advanced Maths (Hons)/Commerce 3529 1 Commerce/Science 3584 1 Commerce / Information Systems 3632 1 Advanced Science (Honours) 3738 1 Engineering Science / CompSc 3925 1 Science and Business 3978 1 Computer Science 3987 1 Advanced Science (Honours) 4500 1 Science (Honours) 4822 1 Design 7021 1 8161 1 Financial Mathematics 8338 1 Engineering Science 8750 1 Statistics
Note the curious extra space before the title of some programs. It took me a while to work it out, can you? (Hint: how are the programs shown in the enrollment file?) Suggest an appopriate change to the pipeline. -
Lecture exercises on
wc
:- how many different programs does UNSW offer?
wc -l program_codes.tsv 250 program_codes.tsv
- how many times was WebCMS accessed?
wc -l access_log.txt 59779 access_log.txt
- how many students are studying in CSE?
wc -l enrollments.txt 9832 enrollments.txt
The above solutions assume that we're talking about total enrollments. If the question actually meant how many distinct indivduals are studying courses offered by CSE, then we'd answer it as:
cut -d'|' -f2 enrollments.txt | sort | uniq | wc -l 6350
- how many words are there in the book?
wc -w book.txt 60428 book.txt
- how many lines are there in the story?
wc -l story.txt 87 story.txt
- how many different programs does UNSW offer?
#include <stdio.h>
// print arguments to stdout
int main(int argc, char *argv[]) {
for (int i = 1; i < argc; i++) {
if (i > 1) {
fputc(' ', stdout);
}
fputs(argv[i], stdout);
}
fputc('\n', stdout);
return 0;
}
import sys
def main():
"""
print arguments to stdout
"""
print(' '.join(sys.argv[1:]))
if __name__ == '__main__':
main()
A simple shell script demonstrating access to arguments.
echo My name is "$0"
echo My process number is $$
echo I have $# arguments
# your not going to see any difference unless you use these in a loop
echo My arguments separately are $*
echo My arguments together are "$*"
echo My arguments separately are $@
echo My arguments as quoted are "$@"
echo My 5th argument is "'$5'"
echo My 10th argument is "'${10}'"
echo My 255th argument is "'${255}'"
$ ./accessing_args.sh one two "three four"
Using $*: one two three four
Using "$*": one two three four
Using "$@": one two three four
echo 'Using $*:'
for a in $*
do
echo "$a"
done
echo 'Using "$*":'
for a in "$*"
do
echo "$a"
done
# This is the way to loop over command-line arguments
echo 'Using "$@":'
for a in "$@"
do
echo "$a"
done
l [file|directories...] - list files
Short shell scripts can be used for convenience.
It is common to put these scripts in a directory such as /home/z1234567/scripts then add this directory to PATH e.g in .bash_login
PATH=$PATH:/home/z1234567/scripts
Note: "$@" like $* expands to the arguments to the script, but preserves whitespace in arguments.
ls -las "$@"
Print the integers 1..n or n..m
if test $# = 1
then
first=1
last=$1
elif test $# = 1
then
first=$1
last=$2
else
echo "Usage: $0 <last> or $0 <first> <last>" 1>&2
fi
number=$first
while test $number -le "$last"
do
echo $number
number=$((number + 1))
done
Print the integers 1..n or m..n
if [ $# = 1 ]
then
first=1
last=$1
elif [ $# = 1 ]
then
first=$1
last=$2
else
echo "Usage: $0 <last> or $0 <first> <last>" 1>&2
fi
number=$first
while [ $number -le $last ]
do
echo $number
number=$((number + 1))
done
Repeatedly download a specified web page until a specified regexp matches its source then notify the specified email address.
For example:
repeat_seconds=300 #check every 5 minutes
if test $# = 3
then
url=$1
regexp=$2
email_address=$3
else
echo "Usage: $0 <url> <regex> <email-address>" 1>&2
exit 1
fi
while true
do
if curl --silent "$url"|grep -E "$regexp" >/dev/null
then
# the 2nd echo is for testing, remove to really send email
echo "Generated by $0" |
echo mail -s "website '$url' now matches regex '$regexp'" "$email_address"
exit 0
fi
sleep $repeat_seconds
done
Change the names of the specified files to lower case. (simple version of the perl utility rename)
Note use of test to check if the new filename is unchanged.
Note the double quotes around $filename so filenames containing spaces are not broken into multiple words
Note the use of mv -- to stop mv interpreting a filename beginning with - as an option
Note files named -n or -e still break the script because echo will treat them as an option,
if test $# = 0
then
echo "Usage $0: <files>" 1>&2
exit 1
fi
for filename in "$@"
do
new_filename=$(echo "$filename" | tr '[:upper:]' '[:lower:]')
test "$filename" = "$new_filename" &&
continue
if test -r "$new_filename"
then
echo "$0: $new_filename exists" 1>&2
elif test -e "$filename"
then
mv -- "$filename" "$new_filename"
else
echo "$0: $filename not found" 1>&2
fi
done
this programs create 1000 files f0.c .. f999.c file f$i.c contains function f$i which returns $i for example file42.c contains function f42 which returns 42 main.c is created with code to call all 1000 functions and print the sum of their return values
first add the initial lines to main.c note the use of quotes on eof to disable variable interpolation in the here document
cat >main.c <<'eof'
#include <stdio.h>
int main(void) {
int v = 0 ;
eof
i=0
while test $i -lt 1000
do
# add a line to main.c to call the function f$i
cat >>main.c <<eof
int f$i(void);
v += f$i();
eof
# create file$i.c containing function f$i
cat >file$i.c <<eof
int f$i(void) {
return $i;
}
eof
i=$((i + 1))
done
cat >>main.c <<'eof'
printf("%d\n", v);
return 0;
}
eof
# compile and run the 1001 C files
# time clang main.c file*.c
# ./a.out
set -x
# written by andrewt@unsw.edu.au for COMP(2041|9044)
#
# Run as plagiarism_detection.simple_diff.sh <files>
# Report if any of the files are copies of each other
#
# Note use of diff -iw so changes in white-space or case
# are ignored
for file1 in "$@"
do
for file2 in "$@"
do
test "$file1" = "$file2" &&
break # avoid comparing pairs of assignments twice
if diff -i -w "$file1" "$file2" >/dev/null
then
echo "$file1 is a copy of $file2"
fi
done
done
Improved version of plagiarism_detection.simple_diff.sh
The substitution s/\/\/.*// removes // style C comments.
This means changes in comments won't affect comparisons.
Note use of temporary files
TMP_FILE1=/tmp/plagiarism_tmp1$$
TMP_FILE2=/tmp/plagiarism_tmp2$$
for file1 in "$@"
do
for file2 in "$@"
do
test "$file1" = "$file2" &&
break # avoid comparing pairs of assignments twice
sed 's/\/\/.*//' "$file1" >$TMP_FILE1
sed 's/\/\/.*//' "$file2" >$TMP_FILE2
if diff -i -w $TMP_FILE1 $TMP_FILE2 >/dev/null
then
echo "$file1 is a copy of $file2"
fi
done
done
rm -f $TMP_FILE1 $TMP_FILE2
Improved version of plagiarism_detection.comments.sh
change all C strings to the letter 's' and change all identifiers to the letter 'v'.
Hence changes in strings & identifiers will be ignored.
TMP_FILE1=/tmp/plagiarism_tmp1$$
TMP_FILE2=/tmp/plagiarism_tmp2$$
# s/"["]*"/s/g changes strings to the letter 's'
# It won't match a few C strings which is OK for our purposes
# s/[a-zA-Z_][a-zA-Z0-9_]*/v/g changes variable names to 'v'
# It will also change function names, keywords etc.
# which is OK for our purposes.
substitutions='
s/\/\/.*//
s/"[^"]"/s/g
s/[a-zA-Z_][a-zA-Z0-9_]*/v/g'
for file1 in "$@"
do
for file2 in "$@"
do
test "$file1" = "$file2" &&
break # avoid comparing pairs of assignments twice
sed "$substitutions" "$file1" >$TMP_FILE1
sed "$substitutions" "$file2" >$TMP_FILE2
if diff -i -w $TMP_FILE1 $TMP_FILE2 >/dev/null
then
echo "$file1 is a copy of $file2"
fi
done
done
rm -f $TMP_FILE1 $TMP_FILE2
Improved version of plagiarism_detection.identifiers.sh
Note the use of sort so line reordering won't prevent detection of plagiarism.
TMP_FILE1=/tmp/plagiarism_tmp1$$
TMP_FILE2=/tmp/plagiarism_tmp2$$
substitutions='
s/\/\/.*//
s/"[^"]"/s/g
s/[a-zA-Z_][a-zA-Z0-9_]*/v/g'
for file1 in "$@"
do
for file2 in "$@"
do
test "$file1" = "$file2" &&
break # avoid comparing pairs of assignments twice
sed "$substitutions" "$file1"|sort >$TMP_FILE1
sed "$substitutions" "$file2"|sort >$TMP_FILE2
if diff -i -w $TMP_FILE1 $TMP_FILE2 >/dev/null
then
echo "$file1 is a copy of $file2"
fi
done
done
rm -f $TMP_FILE1 $TMP_FILE2
Improved version of plagiarism_detection.reordering.sh with robust creation and removal of temporary files
TMP_FILE1=$(mktemp)
TMP_FILE2=$(mktemp)
trap 'rm -f $TMP_FILE1 $TMP_FILE2;exit' INT TERM EXIT
substitutions='
s/\/\/.*//
s/"[^"]"/s/g
s/[a-zA-Z_][a-zA-Z0-9_]*/v/g'
for file1 in "$@"
do
for file2 in "$@"
do
test "$file1" = "$file2" &&
break # avoid comparing pairs of assignments twice
sed "$substitutions" "$file1"|sort >$TMP_FILE1
sed "$substitutions" "$file2"|sort >$TMP_FILE2
if diff -i -w $TMP_FILE1 $TMP_FILE2 >/dev/null
then
echo "$file1 is a copy of $file2"
fi
done
done
print print numbers < 1000
note use of local Shell builtin to scope a variable without the local declaration the variable i in the function would be global and would break the bottom while loop
local is not (yet) POSIX but is widely supported
is_prime() {
local n i
n=$1
i=2
while test $i -lt $n
do
test $((n % i)) -eq 0 &&
return 1
i=$((i + 1))
done
return 0
}
i=0
while test $i -lt 1000
do
is_prime $i && echo $i
i=$((i + 1))
done
my_process_id=$$
# launch a asynchronous sub-shell that will kill
# this process in a second
(sleep 1; kill $my_process_id) &
i=0
while true
do
echo $i
i=$((i + 1))
done
catch signal SIGINT and print message
trap 'echo ha ha' INT
n=0
while true
do
echo "$n"
sleep 1
n=$((n + 1))
done
catch signal SIGTERM, print message and exit
trap 'echo loop executed $n times in 1 second; exit 0' TERM
# launch a sub-shell that will terminate
# this process in 1 second
my_process_id=$$
(sleep 1; kill $my_process_id) &
n=0
while true
do
n=$((n + 1))
done
compile the files of a muti-file C program in parallel use create_1001_file_C_program.sh to create suitable test data
On a CPU with n cores this can be (nearly) n times faster
If there are large number of C files we may exhaust memory or operating system resources
for f in "$@"
do
clang -c "$f" &
done
# wait for the incremental compiles to finish
# and then compile .o files into single binary
wait
clang -o binary -- *.o
compile the files of a muti-file C program in parallel use create_1001_file_C_program.sh to create suitable test data
on Linux getconf will tell us how many cores the machine has otherwise assume 8
max_processes=$(getconf _NPROCESSORS_ONLN 2>/dev/null) ||
max_processes=8
# NOTE: this breaks if a filename contains whitespace or quotes
echo "$@"|
xargs --max-procs=$max_processes --max-args=1 clang -c
clang -o binary -- *.o
compile the files of a multi-file C program in parallel use create_1001_file_C_program.sh to create suitable test data
find's -print0 option terminates pathnames with a '\0' xargs's --null option expects '\0' terminated input as '\0' can not appear in file names this can handle any filename
on Linux getconf will tell us how many cores the machine has if getconf assume 8
max_processes=$(getconf _NPROCESSORS_ONLN 2>/dev/null) ||
max_processes=8
find "$@" -print0|
xargs --max-procs=$max_processes --max-args=1 --null clang -c
clang -o binary -- *.o
compile the files of a muti-file C program in parallel use create_1001_file_C_program.sh to create suitable test data
parallel clang -c '{}' ::: "$@"
clang -o binary -- *.o
print print numbers < 1000
Rewritten to use bash arithmetic extension (())
This makes the program more readable but less portable.
is_prime() {
local n i
n=$1
i=2
while ((i < n))
do
if ((n % i == 0))
then
return 1
fi
i=$((i + 1))
done
return 0
}
i=0
while ((i < 1000))
do
is_prime $i && echo $i
i=$((i + 1))
done
"""
Compute Pythagoras' Theorem
written by d.brotherston@unsw.edu.au as a COMP(2041|9044) lecture example
translated from perl written by andrewt@cse.unsw.edu.au
"""
import math
x = float(input("Enter x: "))
y = float(input("Enter y: "))
pythagoras = math.sqrt(x**2 + y**2)
print(f"Square root of {x} squared + {y} squared is {pythagoras}")
"""
Read numbers until end of input (or a non-number) is reached
Then print the sum of the numbers
written by d.brotherston@unsw.edu.au as a COMP(2041|9044) lecture example
translated from perl written by andrewt@cse.unsw.edu.au
"""
from sys import stdin
sum = 0
for line in stdin:
line = line.strip()
try:
sum += int(line)
except ValueError as e:
print(e)
print(f"Sum of the numbers is {sum}")
"""
Simple example reading a line of input and examining characters
written by d.brotherston@unsw.edu.au as a COMP(2041|9044) lecture example
"""
try:
line = input("Enter some input: ")
except EOFError:
print("could not read any characters")
exit(1)
n_chars = len(line)
print(f"That line contained {n_chars} characters")
if n_chars > 0:
first_char = line[0]
last_char = line[-1]
print(f"The first character was '{first_char}'")
print(f"The last character was '{last_char}'")
"""
Reads lines of input until end-of-input
Print "snap!" if two consecutive lines are identical
written by d.brotherston@unsw.edu.au as a COMP(2041|9044) lecture example
translated from perl written by andrewt@cse.unsw.edu.au
"""
last = None;
while True:
try:
curr = input("Enter line: ")
except EOFError:
print()
break
if curr == last:
print("Snap!")
break
last = curr
"""
Create a string of size 2^n by concatenation
written by d.brotherston@unsw.edu.au as a COMP(2041|9044) lecture example
"""
import sys
if len(sys.argv) != 2:
print(f"Usage: {sys.argv[0]}: <n>")
exit(1)
n = 0
string = "@"
while n < int(sys.argv[1]):
string *= 2
# or `string += string`
# or `string = string + string`
n += 1
print(f"String of 2^{n} = {len(string)} characters created")
Python implementation of /bin/echo
using indexing & while, not pythonesque
import sys
i = 1
while i < len(sys.argv):
if i > 1:
print(" ", end="")
print(sys.argv[i], end="")
i += 1
print()
Python implementation of /bin/echo
using indexing & range, not pythonesque
import sys
for i in range(1, len(sys.argv)):
if i > 1:
print(' ', end='')
print(sys.argv[i], end='')
print()
Python implementation of /bin/echo
import sys
if sys.argv[1:]:
print(sys.argv[1], end='')
for arg in sys.argv[2:]:
print('', arg, end='')
print()
sum integers supplied as command line arguments no check that arguments are integers
import sys
total = 0
for arg in sys.argv[1:]:
total += int(arg)
print("Sum of the numbers is", total)
sum integers supplied as command line arguments
import sys
total = 0
for arg in sys.argv[1:]:
try:
total += int(arg)
except ValueError:
print(f"error: '{arg}' is not an integer", file=sys.stderr)
sys.exit(1)
print("Sum of the numbers is", total)
Count the number of lines on standard input.
import sys
line_count = 0
for line in sys.stdin:
line_count += 1
print(line_count, "lines")
Count the number of lines on standard input.
import sys
lines = sys.stdin.readlines()
line_count = len(lines)
print(line_count, "lines")
Count the number of lines on standard input.
import sys
lines = list(sys.stdin)
line_count = len(lines)
print(line_count, "lines")
Simple cp implementation for text files using line-based I/O explicit close is used below, a with statement would be better no error handling
import sys
if len(sys.argv) != 3:
print("Usage:", sys.argv[0], "<infile> <outfile>", file=sys.stderr)
sys.exit(1)
infile = open(sys.argv[1], "r", encoding="utf-8")
outfile = open(sys.argv[2], "w", encoding="utf-8")
for line in infile:
print(line, end='', file=outfile)
infile.close()
outfile.close()
Simple cp implementation for text files using line-based I/O and with statement, but no error handling
import sys
if len(sys.argv) != 3:
print("Usage:", sys.argv[0], "<infile> <outfile>", file=sys.stderr)
sys.exit(1)
with open(sys.argv[1]) as infile:
with open(sys.argv[2], "w") as outfile:
for line in infile:
outfile.write(line)
Simple cp implementation for text files using line-based I/O and with statement and error handling
import sys
if len(sys.argv) != 3:
print("Usage:", sys.argv[0], "<infile> <outfile>", file=sys.stderr)
sys.exit(1)
try:
with open(sys.argv[1]) as infile:
with open(sys.argv[2], "w") as outfile:
for line in infile:
outfile.write(line)
except OSError as e:
print(sys.argv[0], "error:", e, file=sys.stderr)
sys.exit(1)
Simple cp implementation for text files using line-based I/O reading all lines into array (not advisable for large files)
import sys
if len(sys.argv) != 3:
print("Usage:", sys.argv[0], "<infile> <outfile>", file=sys.stderr)
sys.exit(1)
try:
with open(sys.argv[1]) as infile:
with open(sys.argv[2], "w") as outfile:
lines = infile.readlines()
outfile.writelines(lines)
except OSError as e:
print(sys.argv[0], "error:", e, file=sys.stderr)
sys.exit(1)
Simple cp implementation using shutil.copyfile
import sys
from shutil import copyfile
if len(sys.argv) != 3:
print("Usage:", sys.argv[0], "<infile> <outfile>", file=sys.stderr)
sys.exit(1)
try:
copyfile(sys.argv[1], sys.argv[2])
except OSError as e:
print(sys.argv[0], "error:", e, file=sys.stderr)
sys.exit(1)
Simple cp implementation by running /bin/cp
import subprocess
import sys
if len(sys.argv) != 3:
print("Usage:", sys.argv[0], "<infile> <outfile>", file=sys.stderr)
sys.exit(1)
p = subprocess.run(['cp', sys.argv[1], sys.argv[2]])
sys.exit(p.returncode)
Repeatedly download a specified web page until a specified regexp matches its source then notify the specified email address.
For example:
repeat_seconds=300 #check every 5 minutes
if test $# = 3
then
url=$1
regexp=$2
email_address=$3
else
echo "Usage: $0 <url> <regex> <email-address>" 1>&2
exit 1
fi
while true
do
if curl --silent "$url"|grep -E "$regexp" >/dev/null
then
# the 2nd echo is for testing, remove to really send email
echo "Generated by $0" |
echo mail -s "website '$url' now matches regex '$regexp'" "$email_address"
exit 0
fi
sleep $repeat_seconds
done
Repeatedly download a specified web page until a specified regexp matches its source then notify the specified email address.
implemented using subprocess
import re
import subprocess
import sys
import time
REPEAT_SECONDS = 300 # check every 5 minutes
if len(sys.argv) == 4:
url = sys.argv[1]
regexp = sys.argv[2]
email_address = sys.argv[3]
else:
print(f"Usage: {sys.argv[0]} <url> <regex> <email-address>", file=sys.stderr)
sys.exit(1)
while True:
p = subprocess.run(
["curl", "--silent", url], text=True, stdout=subprocess.PIPE
)
webpage = p.stdout
if not re.search(regexp, webpage):
time.sleep(REPEAT_SECONDS)
continue
mail_body = f"Generated by {sys.argv[0]}"
subject = f"website '{url}' now matches regex '{regexp}'"
# the echo is for testing, remove to really send email
subprocess.run(["echo", "mail", "-s", subject], text=True, input=mail_body)
sys.exit(0)
Repeatedly download a specified web page until a specified regexp matches its source then notify the specified email address.
implemented using urllib.request
import re
import sys
import subprocess
import time
import urllib.request
REPEAT_SECONDS = 300 # check every 5 minutes
if len(sys.argv) == 4:
url = sys.argv[1]
regexp = sys.argv[2]
email_address = sys.argv[3]
else:
print(f"Usage: {sys.argv[0]} <url> <regex> <email-address>", file=sys.stderr)
sys.exit(1)
while True:
response = urllib.request.urlopen(url)
webpage = response.read().decode()
if not re.search(regexp, webpage):
time.sleep(REPEAT_SECONDS)
continue
mail_body = f"Generated by {sys.argv[0]}"
subject = f"website '{url}' now matches regex '{regexp}'"
# the echo is for testing, remove to really send email
subprocess.run(["echo", "mail", "-s", subject], text=True, input=mail_body)
sys.exit(0)
fetch and print the text of a web page using HTML parser BeautifulSoup
import re
import sys
import urllib.request
import bs4 as BeautifulSoup
IGNORE_WEBPAGE_ELEMENTS = set("[document] head meta style script title".split())
for url in sys.argv[1:]:
response = urllib.request.urlopen(url)
webpage = response.read().decode()
soup = BeautifulSoup.BeautifulSoup(webpage, "lxml")
for element in soup.findAll(text=True):
parent = element.parent.name.lower()
if parent in IGNORE_WEBPAGE_ELEMENTS:
continue
text = element.getText()
# remove empty lines and leading whitespace
text = re.sub(r"\n\s+", "\n", element)
text = text.strip()
if text:
print(text)
def a():
x = 1
print('a', x, y, z)
def b():
x = 2
y = 2
a()
print('b', x, y, z)
def c():
x = 3
y = 3
global z
z = 3
b()
print('c', x, y, z)
"""
approximate of implementation of some python functions
written by andrewt@unsw.edu.au for COMP(2041|9044)
"""
def my_enumerate(sequence, start=0):
"""return a list equivalent to the iterator returned
by builtin function enumerate
"""
n = start
tuples = []
for element in sequence:
t = (n, element)
tuples.append(t)
n += 1
return tuples
def my_zip2(sequence1, sequence2):
"""return a list equivalent to the iterator returned by
builtin function zip called with 2 sequences.
Note: zip can be given any number of sequences."""
tuples = []
for index in range(min(len(sequence1), len(sequence2))):
t = (sequence1[index], sequence2[index])
tuples.append(t)
return tuples
def my_map1(function, sequence):
"""return a list equivalent to the iterator returned by
builtin function map called with 1 sequence.
Note: map can be given more than 1 sequences."""
results = []
for value in sequence:
result = function(value)
results.append(result)
return results
def my_filter(function, sequence):
"""return a list equivalent to the iterator returned by
builtin function filter called with a function.
Note: filter can be given None instead of a function."""
filtered = []
for value in sequence:
if function(value):
filtered.append(value)
return filtered
if __name__ == "__main__":
print(my_enumerate("abcde"))
print(my_zip2("Hello", "Andrew"))
cubes = my_map1(lambda x: x**3, range(10))
print(cubes)
even = my_filter(lambda x: x % 2 == 0, range(10))
print(even)
"""
calculate Dot Product https://en.wikipedia.org/wiki/Dot_product
of 2 lists - list are assumed to be the same length
written by andrewt@unsw.edu.au for COMP(2041|9044)
"""
import operator
def dot_product0(a, b):
"""return dot product of 2 lists - using for loop + indexing"""
total = 0
for i in range(len(a)):
total += a[i] * b[i]
return total
def dot_product1(a, b):
"""return dot product of 2 lists - using for loop + enumerate"""
total = 0
for i, a_i in enumerate(a):
total += a_i * b[i]
return total
def dot_product2(a, b):
"""return dot product of 2 lists - using for loop + zip"""
total = 0
for x, y in zip(a, b):
total += x * y
return total
def dot_product3(a, b):
"""return dot product of 2 lists - using list comprension + zip"""
return sum(x * y for x, y in zip(a, b))
def multiply(x, y):
"""multipy 2 numbers - operator.mul does this"""
return x * y
def dot_product4(a, b):
"""return dot product of 2 lists - map"""
return sum(map(multiply, a, b))
def dot_product5(a, b):
"""return dot product of 2 lists - map + lambda"""
return sum(map(lambda x, y: x * y, a, b))
def dot_product6(a, b):
"""return dot product of 2 lists - map + operator.mul"""
return sum(map(operator.mul, a, b))
if __name__ == "__main__":
a = range(5, 10)
b = range(11, 16)
print(dot_product0(a, b))
print(dot_product1(a, b))
print(dot_product2(a, b))
print(dot_product3(a, b))
print(dot_product4(a, b))
print(dot_product5(a, b))
print(dot_product6(a, b))
"""
extract odd numbers from a list
written by andrewt@unsw.edu.au for COMP(2041|9044)
"""
def is_odd(number):
return number % 2 == 2
def odd0(numbers):
"""extract odd_numbers from list using for loop"""
odd_numbers = []
for n in numbers:
if is_odd(n):
odd_numbers.append(n)
return odd_numbers
def odd1(numbers):
"""extract odd_numbers from list using list comprehension"""
return [n for n in numbers if is_odd(n)]
def odd2(numbers):
"""extract odd_numbers from list using filter"""
return filter(is_odd, numbers)
def odd3(numbers):
"""extract odd numbers from list using filter + lambda"""
return filter(lambda n: n % 2 == 2, numbers)
if __name__ == "__main__":
numbers = range(1, 11)
print(odd0(numbers))
print(odd1(numbers))
print(odd2(numbers))
print(odd3(numbers))
"""
sorting a list based on the values in a hash
"""
import random
DAY_LIST = "Sunday Monday Tuesday Wednesday Thursday Friday Saturday".split()
DAY_NUMBER = dict((day, number) for number, day in enumerate(DAY_LIST))
def random_day_of_week():
return random.choice(DAY_LIST)
def sort_days0(day_list):
return sorted(day_list, key=lambda day: DAY_NUMBER[day])
def sort_days1(day_list):
return sorted(day_list, key=DAY_NUMBER.get)
if __name__ == "__main__":
print(DAY_LIST)
print(DAY_NUMBER)
random_days = [random_day_of_week() for _ in range(7)]
print(random_days)
print(sorted(random_days))
print(sort_days0(random_days))
print(sort_days1(random_days))
count people enrolled in courses implemented using regular expressions & dicts
import re
COURSE_CODES_FILE = "course_codes.tsv"
ENROLLMENTS_FILE = "enrollments.txt"
# course_codes.tsv contains tab separated UNSW course and names, e..g
# ACCT1501 Accounting & Financial Mgt 1A
# enrollments.txt contains synthetic course enrollments
# with fields separated by | e.g.:
# COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M
course_names = {}
with open(COURSE_CODES_FILE, encoding="utf-8") as f:
for line in f:
if m := re.match(r"(\S+)\s+(.*\S)", line):
course_names[m.group(1)] = m.group(2)
enrollments_count = {}
with open(ENROLLMENTS_FILE, encoding="utf-8") as f:
for line in f:
course_code = re.sub(r"\|.*\n", "", line)
if course_code not in enrollments_count:
enrollments_count[course_code] = 0
enrollments_count[course_code] += 1
for (course_code, enrollment) in sorted(enrollments_count.items()):
# if no name for course_code use ???
name = course_names.get(course_code, "???")
print(f"{enrollment:4} {course_code} {name}")
count people enrolled in courses implemented using string operations, a dict, & a counters
import collections
import re
COURSE_CODES_FILE = "course_codes.tsv"
ENROLLMENTS_FILE = "enrollments.txt"
# course_codes.tsv contains tab separated UNSW course and names, e..g
# ACCT1501 Accounting & Financial Mgt 1A
# enrollments.txt contains synthetic course enrollments
# with fields separated by |
# COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M
course_names = {}
with open(COURSE_CODES_FILE, encoding="utf-8") as f:
for line in f:
course_code, course_name = line.split("\t", maxsplit=1)
course_names[course_code] = course_name
enrollments_count = collections.Counter()
with open(ENROLLMENTS_FILE, encoding="utf-8") as f:
for line in f:
course_code = re.sub(r"\|.*\n", "", line)
enrollments_count[course_code] += 1
for (course_code, enrollment) in sorted(enrollments_count.items()):
# if no name for course_code use ???
name = course_names.get(course_code, "???")
print(f"{enrollment:4} {course_code} {name}")
count how many people enrolled have each first name implemented using regular expressions, a set & counters
import collections
import re
ENROLLMENTS_FILE = "enrollments.txt"
# enrollments.txt contains synthetic course enrollments
# with fields separated by | e.g.:
# COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M
already_counted = set()
first_name_count = collections.Counter()
with open(ENROLLMENTS_FILE, encoding="utf-8") as f:
for line in f:
_, student_number, full_name = line.split("|")[0:3]
if student_number in already_counted:
continue
already_counted.add(student_number)
if m := re.match(r".*,\s+(\S+)", full_name):
first_name = m.group(1)
first_name_count[first_name] += 1
# put the count first in the tuples so sorting orders on count before name
count_name_tuples = [(c, f) for (f, c) in first_name_count.items()]
# print first names in decreasing order of popularity
for (count, first_name) in sorted(count_name_tuples, reverse=True):
print(f"{count:4} {first_name}")
Report cases where there are 5 or more people of the same first name enrolled in a course implemented using a dict of dicts
import re
import sys
REPORT_MORE_THAN_STUDENTS = 5
ENROLLMENTS_FILE = "enrollments.txt"
# enrollments.txt contains synthetic course enrollments
# with fields separated by | e.g.:
# COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M
course_first_name_count = {}
with open(ENROLLMENTS_FILE, encoding="utf-8") as f:
for line in f:
course_code, _, full_name = line.split("|")[0:3]
if m := re.match(r".*,\s+(\S+)", full_name):
first_name = m.group(1)
else:
print("Warning could not parse line", line.strip(), file=sys.stderr)
continue
if course_code not in course_first_name_count:
course_first_name_count[course_code] = {}
if first_name not in course_first_name_count[course_code]:
course_first_name_count[course_code][first_name] = 0
course_first_name_count[course_code][first_name] += 1
for course in sorted(course_first_name_count.keys()):
for (first_name, count) in course_first_name_count[course].items():
if count >= REPORT_MORE_THAN_STUDENTS:
print(course, "has", count, "students named", first_name)
Report cases where there are more than 5 people of the same first name enrolled in a course implemented using a defaultdict & counter from collections and string operations
import collections
import sys
REPORT_MORE_THAN_STUDENTS = 5
ENROLLMENTS_FILE = "enrollments.txt"
# enrollments.txt contains synthetic course enrollments
# with fields separated by | e.g.:
# COMP1911|5218563|Syed, Hugh Ali|3707/1|COMPAS|090.667|22T2|20010419|M
course_first_name_count = collections.defaultdict(collections.Counter)
with open(ENROLLMENTS_FILE, encoding="utf-8") as f:
for line in f:
course_code, _, full_name = line.split("|")[0:3]
given_names = full_name.split(",")[1].strip()
first_name = given_names.split(" ")[0]
course_first_name_count[course_code][first_name] += 1
for course in sorted(course_first_name_count.keys()):
for (first_name, count) in course_first_name_count[course].items():
if count > REPORT_MORE_THAN_STUDENTS:
print(course, "has", count, "students named", first_name)
Print the last number (real or integer) on every line
Note: regexp to match number: -?\d+\.?\d*
Note: use of assignment operator :=
import re, sys
for line in sys.stdin:
if m := re.search(r'(-?\d+\.?\d*)\D*$', line):
print(m.group(1))
Find the positive integers among input text print their sum and mean
Note regexp to split on non-digits
Note check to handle empty string from split
Only positive integers handled
import re, sys
input_as_string = sys.stdin.read()
numbers = re.split(r"\D+", input_as_string)
print(numbers)
total = 0
n = 0
for number in numbers:
if number:
total += int(number)
n += 1
if numbers:
print(n, "numbers: total", total, "with mean", total / n)
Find the positive integers among input text print their sum and mean
Note regexp to match number -?\d+\.?\d* match postive & integers & floating-point numbers
import re, sys
input_as_string = sys.stdin.read()
numbers = re.findall(r"-?\d+\.?\d*", input_as_string)
print(numbers)
n = len(numbers)
total = sum(float(number) for number in numbers)
if numbers:
print(n, "numbers: total", total, "with mean", total / n)
Change the names of the specified files to lower case. (simple version of the perl utility rename)
Note use of test to check if the new filename is unchanged.
Note the double quotes around $filename so filenames containing spaces are not broken into multiple words
Note the use of mv -- to stop mv interpreting a filename beginning with - as an option
Note files named -n or -e still break the script because echo will treat them as an option,
if test $# = 0
then
echo "Usage $0: <files>" 1>&2
exit 1
fi
for filename in "$@"
do
new_filename=$(echo "$filename" | tr '[:upper:]' '[:lower:]')
test "$filename" = "$new_filename" &&
continue
if test -r "$new_filename"
then
echo "$0: $new_filename exists" 1>&2
elif test -e "$filename"
then
mv -- "$filename" "$new_filename"
else
echo "$0: $filename not found" 1>&2
fi
done
Change the names of the specified files to lower case. (simple version of the perl utility rename)
import os
import sys
for old_pathname in sys.argv[1:]:
new_pathname = old_pathname.lower()
if new_pathname == old_pathname:
continue
if os.path.exists(new_pathname):
print(f"{sys.argv[0]}: '{new_pathname}' exists", file=sys.stderr)
continue
try:
os.rename(old_pathname, new_pathname)
except OSError as e:
print(f"{sys.argv[0]}: '{new_pathname}' {e}", file=sys.stderr)
Change the names of the specified files by substituting occurrances of regex with replacement (simple version of the perl utility rename)
import os
import re
import sys
if len(sys.argv) < 3:
print(f"Usage: {sys.argv[0]} <regex> <replacement> [files]", file=sys.stderr)
sys.exit(1)
regex = sys.argv[1]
replacement = sys.argv[2]
for old_pathname in sys.argv[3:]:
new_pathname = re.sub(regex, replacement, old_pathname, count=1)
if new_pathname == old_pathname:
continue
if os.path.exists(new_pathname):
print(f"{sys.argv[0]}: '{new_pathname}' exists", file=sys.stderr)
continue
try:
os.rename(old_pathname, new_pathname)
except OSError as e:
print(f"{sys.argv[0]}: '{new_pathname}' {e}", file=sys.stderr)
Change the names of the specified files by substituting occurrances of regex with replacement (simple version of the perl utility rename)
also demonstrating argument processing and use of eval
beware eval can allow arbitrary code execution, it should not be used where security is importnat
import argparse
import os
import re
import sys
parser = argparse.ArgumentParser()
# add required arguments
parser.add_argument("regex", type=str, help="match against filenames")
parser.add_argument("replacement", type=str, help="replaces matches with this")
parser.add_argument("filenames", nargs="*", help="filenames to be changed")
# add some optional boolean arguments
parser.add_argument(
"-d", "--dryrun", action="store_true", help="show changes but don't make them"
)
parser.add_argument(
"-v", "--verbose", action="store_true", help="print more information"
)
parser.add_argument(
"-e",
"--eval",
action="store_true",
help="evaluate replacement as python expression, match available as _",
)
# optional integer argument which defaults to 1
parser.add_argument(
"-n",
"--replace_n_matches",
type=int,
default=1,
help="replace n matches (0 for all matches)",
)
args = parser.parse_args()
def eval_replacement(match):
"""if --eval given, evaluate replacment string as Python
with the variable _ set to the matching part of the filename
"""
if not args.eval:
return args.replacement
_ = match.group(0)
return str(eval(args.replacement))
for old_pathname in args.filenames:
try:
new_pathname = re.sub(
args.regex, eval_replacement, old_pathname, count=args.replace_n_matches
)
except OSError as e:
print(
f"{sys.argv[0]}: '{old_pathname}': '{args.replacement}' {e}",
file=sys.stderr,
)
continue
if new_pathname == old_pathname:
if args.verbose:
print("no change:", old_pathname)
continue
if os.path.exists(new_pathname):
print(f"{sys.argv[0]}: '{new_pathname}' exists", file=sys.stderr)
continue
if args.dryrun:
print(old_pathname, "would be renamed to", new_pathname)
continue
if args.verbose:
print("'renaming", old_pathname, "to", new_pathname)
try:
os.rename(old_pathname, new_pathname)
except OSError as e:
print(f"{sys.argv[0]}: '{new_pathname}' {e}", file=sys.stderr)
For each file given as argument replace occurrences of Hermione allowing for some misspellings with Harry and vice-versa.
Relies on Zaphod not occurring in the text.
import re, sys, os
for filename in sys.argv[1:]:
tmp_filename = filename + ".new"
if os.path.exists(tmp_filename):
print(f"{sys.argv[0]}: {tmp_filename} already exists\n", file=sys.stderr)
sys.exit(1)
with open(filename) as f:
with open(tmp_filename, "w") as g:
for line in f:
changed_line = re.sub(r"Herm[io]+ne", "Zaphod", line)
changed_line = changed_line.replace("Harry", "Hermione")
changed_line = changed_line.replace("Zaphod", "Harry")
g.write(changed_line)
os.rename(tmp_filename, filename)
For each file given as argument replace occurrences of Hermione allowing for some misspellings with Harry and vice-versa.
Relies on Zaphod not occurring in the text.
import re, sys, os, shutil, tempfile
for filename in sys.argv[1:]:
with tempfile.NamedTemporaryFile(mode='w', delete=False) as tmp:
with open(filename) as f:
for line in f:
changed_line = re.sub(r"Herm[io]+ne", "Zaphod", line)
changed_line = changed_line.replace("Harry", "Hermione")
changed_line = changed_line.replace("Zaphod", "Harry")
tmp.write(changed_line)
shutil.move(tmp.name, filename)
For each file given as argument replace occurrences of Hermione allowing for some misspellings with Harry and vice-versa.
Relies on Zaphod not occurring in the text.
modified text is stored in a list then file over-written
import re, sys, os
for filename in sys.argv[1:]:
changed_lines = []
with open(filename) as f:
for line in f:
changed_line = re.sub(r"Herm[io]+ne", "Zaphod", line)
changed_line = changed_line.replace("Harry", "Hermione")
changed_line = changed_line.replace("Zaphod", "Harry")
changed_lines.append(changed_line)
with open(filename, "w") as g:
g.write("".join(changed_lines))
For each file given as argument replace occurrences of Hermione allowing for some misspellings with Harry and vice-versa.
Relies on Zaphod not occurring in the text.
modified text is stored in a single string then file over-written
import re, sys, os
for filename in sys.argv[1:]:
changed_lines = []
with open(filename) as f:
text = f.read()
changed_text = re.sub(r"Herm[io]+ne", "Zaphod", text)
changed_text = changed_text.replace("Harry", "Hermione")
changed_text = changed_text.replace("Zaphod", "Harry")
with open(filename, "w") as g:
g.write("".join(changed_text))
Simple makefile
game : main.o graphics.o world.o
gcc -o game main.o graphics.o world.o
main.o : main.c graphics.h world.h
gcc -c main.c
graphics.o : graphics.c world.h
gcc -c graphics.c
world.o : world.c world.h
gcc -c world.c
clean:
rm -f game main.o graphics.o world.o
"""
Simple Python implementation of "make".
Parses makefile rules and stores them in a dict
then builds targets with a recursive function.
written by andrewt@unsw.edu.au as a COMP(2041|9044) lecture example
"""
import argparse
import collections
import os
import re
import sys
import subprocess
def main():
"""determine targets to build and build them"""
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--makefile", default="Makefile")
parser.add_argument("-n", "--dryrun", action="store_true")
parser.add_argument("build_targets", nargs="*")
args = parser.parse_args()
rules = parse_makefile(args.makefile)
# if not target is specified use first target in Makefile (if any)
build_targets = args.build_targets or list(rules.keys())[:1]
for target in build_targets:
build(target, rules, args.dryrun)
def build(target, rules, dryrun=False):
"""recursively check dependencies and run commands as needed to build target"""
(dependencies, build_commands) = rules.get(target, ([], []))
build_needed = not os.path.exists(target)
for d in dependencies:
build(d, rules, dryrun)
build_needed = build_needed or os.path.getmtime(d) > os.path.getmtime(target)
if not build_needed:
return
if not build_commands and not os.path.exists(target):
print("*** No rule to make target", target)
sys.exit(1)
for command in build_commands:
print(command)
if not dryrun:
subprocess.run(command, shell=True)
def parse_makefile(makefile_name):
"""return dict mapping makefile targets to (dependencies, build commands) tuple"""
rules = collections.OrderedDict()
with open(makefile_name, encoding="utf-8") as f:
while line := f.readline():
if not (m := re.match(r"^(\S+)\s*:\s*(.*)", line)):
continue
target = m.group(1)
dependencies = m.group(2).split()
build_commands = []
while (line := f.readline()).startswith("\t"):
build_commands.append(line.strip())
rules[target] = (dependencies, build_commands)
return rules
if __name__ == "__main__":
main()
Simple makefile with variables & a comment
CC=clang-11
CFLAGS=-O3 -Wall
game : main.o graphics.o world.o
$(CC) $(CFLAGS) -o game main.o graphics.o world.o
main.o : main.c graphics.h world.h
$(CC) $(CFLAGS) -c main.c
graphics.o : graphics.c world.h
$(CC) $(CFLAGS) -c graphics.c
world.o : world.c world.h
$(CC) $(CFLAGS) -c world.c
clean:
rm -f game main.o graphics.o world.o
"""
Simple Python implementation of "make".
Parses makefile rules and stores them in a dict
then builds targets with a recursive function.
This version implements Makefile variables & comments.
written by andrewt@unsw.edu.au as a COMP(2041|9044) lecture example
"""
import argparse
import collections
import os
import re
import sys
import subprocess
def main():
"""determine targets to build and build them"""
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--makefile", default="Makefile")
parser.add_argument("-n", "--dryrun", action="store_true")
parser.add_argument("build_targets", nargs="*")
args = parser.parse_args()
rules = parse_makefile(args.makefile)
# if not target is specified use first target in Makefile (if any)
build_targets = args.build_targets or list(rules.keys())[:1]
for target in build_targets:
build(target, rules, args.dryrun)
def build(target, rules, dryrun=False):
"""recursively check dependencies and run commands as needed to build target"""
(dependencies, build_commands) = rules.get(target, ([], []))
build_needed = not os.path.exists(target)
for d in dependencies:
build(d, rules)
build_needed = build_needed or os.path.getmtime(d) > os.path.getmtime(target)
if not build_needed:
return
if not build_commands and not os.path.exists(target):
print("*** No rule to make target", target)
sys.exit(1)
for command in build_commands:
print(command)
if not dryrun:
subprocess.run(command, shell=True)
def parse_makefile(makefile_name):
"""return dict mapping makefile targets to (dependencies, build commands) tuple"""
rules = collections.OrderedDict()
variables = {}
with open(makefile_name, encoding="utf-8") as f:
while line := f.readline():
# remove any comment
line = re.sub(r"#.*", "", line)
# check for variable definition
if m := re.match(r"^\s*(\S+)\s*=\s*(.*)", line):
variables[m.group(1)] = m.group(2)
continue
line = replace_variables(line, variables)
if not (m := re.match(r"^(\S+)\s*:\s*(.*)", line)):
continue
target = m.group(1)
dependencies = m.group(2).split()
build_commands = []
while (line := f.readline()).startswith("\t"):
line = replace_variables(line, variables)
build_commands.append(line.strip())
rules[target] = (dependencies, build_commands)
return rules
def replace_variables(line, variables):
"""return line with occurances of $(variable) replaced by variable's value"""
return re.sub(r"\$\((.*?)\)", lambda m: variables.get(m.group(1), ""), line)
if __name__ == "__main__":
main()
FROM debian:bookworm
# install packages and create some directories and a file
RUN \
apt-get update &&\
apt-get install -q -y apache2 curl &&\
apt-get -q -y clean &&\
echo ServerName localhost >/etc/apache2/conf-enabled/localhost.conf &&\
mkdir -p /var/run/apache2 /var/log/apache2 /var/lock &&\
echo hello Andrew >/var/www/html/hello.html
# specify default command
ENTRYPOINT \
apachectl start &&\
bash -l &&\
apachectl stop
All Links
- All Tutorial Questions
- All Tutorial Sample Answers
-
- All Laboratory Exercises
- All Laboratory Sample Solutions
-
- All Weekly Test Questions
- All Weekly Test Sample Answers
-
- Course Intro
- Filters
- Shell
- Python Intro
- Python More
- Python Functions
- Python Regex
- Python Modules
- Make
- Linux Tools
- Exam