Software Construction


#include <stdio.h>

// print arguments to stdout
int main(int argc, char *argv[]) {

    for (int i = 1; i < argc; i++) {
        if (i > 1) {
            fputc(' ', stdout);
        }
        fputs(argv[i], stdout);
    }
    fputc('\n', stdout);

    return 0;
}


import sys


def main():
    """
    print arguments to stdout
    """
    print(' '.join(sys.argv[1:]))


if __name__ == '__main__':
    main()
inspect how shell splits lines into program arguments (argv)
import sys
print(f'sys.argv = {sys.argv}')

A simple shell script demonstrating access to arguments.
echo My name is "$0"
echo My process number is $$
echo I have $# arguments

echo My command-line arguments are "$@"

echo My 5th argument is "'$5'"
echo My 10th argument is "'${10}'"

$ ./accessing_args.sh one two "three four" one two three four
for a in "$@"
do
  echo "$a"
done

l [file|directories...] - list files


Short shell scripts can be used for convenience.

It is common to put these scripts in a directory such as /home/z1234567/scripts then add this directory to PATH e.g in .bash_login
PATH=$PATH:/home/z1234567/scripts

Note: "$@" expands to the arguments to the script, but preserves whitespace in arguments.
ls -las "$@"


Count the number of time each different word occurs in the files given as arguments, or stdin if no arguments, e.g. word_frequency.sh dracula.txt
cat "$@" |                   # tr doesn't take filenames as arguments
tr 'A-Z' 'a-z'|              # map uppercase to lower case, better - tr '[:upper:]' '[:lower:]'
tr ' ' '\n' |                # convert to one word per line
tr -cd "a-z'" |              # remove all characters except a-z and '
grep -E -v '^$' |            # remove empty lines
sort |                       # place words in alphabetical order
uniq -c |                    # count how many times each word occurs
sort -rn                     # order in reverse frequency of occurrence

# notes:
# - first 2 tr commands could be combined
# - sed 's/ /\n/g' could be used instead of tr ' ' '\n'
# - sed "s/[^a-z']//g" could be used instead of tr -cd "a-z'"
simple emulation of /usr/bin/seq for a COMP(2041|9044) example

Print the integers 1..n with no argument checking
last=$1

number=1
while test $number -le "$last"
do
    echo $number
    number=$((number + 1))
done
simple emulation of /usr/bin/seq for a COMP(2041|9044) example

Print the integers 1..n or n..m
if test $# = 1
then
    first=1
    last=$1
elif test $# = 2
then
    first=$1
    last=$2
else
    echo "Usage: $0 <last> or  $0 <first> <last>" 1>&2
    exit 1
fi

number=$first
while test $number -le "$last"
do
    echo $number
    number=$((number + 1))
done
simple emulation of /usr/bin/seq for a COMP(2041|9044) example
Print the integers 1..n or m..n
if [ $# = 1 ]
then
    first=1
    last=$1
elif [ $# = 1 ]
then
    first=$1
    last=$2
else
    echo "Usage: $0 <last> or  $0 <first> <last>" 1>&2
    exit 1
fi

number=$first
while [ $number -le $last ]
do
    echo $number
    number=$((number + 1))
done

Repeatedly download a specified web page until a specified regexp matches its source then notify the specified email address.

For example:
repeat_seconds=300  #check every 5 minutes

if test $# = 3
then
    url=$1
    regexp=$2
    email_address=$3
else
    echo "Usage: $0 <url> <regex> <email-address>" 1>&2
    exit 1
fi

while true
do
    if curl --silent "$url"|grep -E "$regexp" >/dev/null
    then
        # the 2nd echo is for testing, remove to really send email
        echo "Generated by $0" |
        echo mail -s "website '$url' now matches regex '$regexp'" "$email_address"
        exit 0
    fi
    sleep $repeat_seconds
done


Change the names of the specified files to lower case. (simple version of the perl utility rename)

Note use of test to check if the new filename is unchanged.

Note the double quotes around $filename so filenames containing spaces are not broken into multiple words

Note the use of mv -- to stop mv interpreting a filename beginning with - as an option

Note files named -n or -e still break the script because echo will treat them as an option,
if test $# = 0
then
    echo "Usage $0: <files>" 1>&2
    exit 1
fi

for filename in "$@"
do
    new_filename=$(
        echo "$filename"|
        tr '[:upper:]' '[:lower:]'
        )

    test "$filename" = "$new_filename" &&
        continue

    if test -r "$new_filename"
    then
        echo "$0: $new_filename exists" 1>&2
    elif test -e "$filename"
    then
        mv -- "$filename" "$new_filename"
    else
        echo "$0: $filename not found" 1>&2
    fi

done
demonstrate simple use of read
echo -n "Do you like learning Shell? "
read answer

# get first letter of answer connverted to lower case
answer="$(
    echo "$answer"|
    cut -c1|
    tr A-Z a-z
    )"

if test "$answer" = "y"
then
    response=":)"
elif test "$answer" = "n"
then
    response=":("
else
    response="??"
fi

echo "$response"
over-simple /bin/cat emulation using read
setting the special variable IFS to the empty string stops trailing white space being stripped
for file in "$@"
do
    while IFS= read -r line
    do
        echo "$line"
    done <$file
done
demonstrate use of a case statement
echo -n "Do you like learning Shell? "
read answer

case "$answer" in
[Yy]*)
    response=":)"
    ;;

[Nn]*)
    response=":("
    ;;

*)
    response="??"
esac

echo "$response"
create 1001 C files, compile and run them
this program creates 1000 files f0.c .. f999.c file f$i.c contains function f$i which returns $i for example file42.c contains function f42 which returns 42 main.c is created with code to call all 1000 functions and print the sum of their return values
first add the initial lines to main.c note the use of quotes on eof to disable variable interpolation in the here document
cat >main.c <<'eof'
#include <stdio.h>

int main(void) {
    int v = 0 ;
eof

i=0
while test $i -lt 1000
do
    # add a line to main.c to call the function f$i

    cat >>main.c <<eof
    int f$i(void);
    v += f$i();
eof

    # create file$i.c containing function f$i

    cat >file$i.c <<eof
int f$i(void) {
    return $i;
}
eof

    i=$((i + 1))
done

cat >>main.c <<'eof'
    printf("%d\n", v);
    return 0;
}
eof

# compile and run the 1001 C files
time clang main.c file*.c
./a.out
demonstrate simple use of a shell function
favourite_command() {
    name=$1
    command=$2
    echo "My name is $name, my favourite Unix command is $command."
}

favourite_command Andrew "uniq"
favourite_command Dylan "jq"
favourite_command Grace "sed"

print print numbers < 1000
note use of local Shell builtin to scope a variable without the local declaration the variable i in the function would be global and would break the bottom while loop
local is not (yet) POSIX but is widely supported
is_prime() {
    local n i
    n=$1
    i=2
    while test $i -lt $n
    do
        test $((n % i)) -eq 0 &&
            return 1
        i=$((i + 1))
    done
    return 0
}

i=0
while test $i -lt 1000
do
    is_prime $i &&
        echo $i
    i=$((i + 1))
done

print print numbers < 1000

Rewritten to use bash arithmetic extension (())
This makes the program more readable but less portable.
is_prime() {
    local n i
    n=$1
    i=2
    while ((i < n))
    do
        if ((n % i == 0))
        then
            return 1
        fi
        i=$((i + 1))
    done
    return 0
}

i=0
while ((i < 1000))
do
    is_prime $i && echo $i
    i=$((i + 1))
done



Run as plagiarism_detection.simple_diff.sh <files>
Report if any of the files are copies of each other

Note use of diff -iw so changes in white-space or case are ignored
for file1 in "$@"
do
    for file2 in "$@"
    do
        test "$file1" = "$file2" &&
            break # avoid comparing pairs of assignments twice

        if diff -iBw "$file1" "$file2" >/dev/null
        then
            echo "$file1 is a copy of $file2"
        fi

    done
done


Improved version of plagiarism_detection.simple_diff.sh

The substitution s/\/\/.*// removes // style C comments.
This means changes in comments won't affect comparisons.

Note use of temporary files is insecure - an attacker can anticipate the filename
TMP_FILE1=/tmp/plagiarism_tmp1$$
TMP_FILE2=/tmp/plagiarism_tmp2$$


for file1 in "$@"
do
    for file2 in "$@"
    do
        test "$file1" = "$file2" &&
            break # avoid comparing pairs of assignments twice

        sed 's/\/\/.*//' "$file1" >$TMP_FILE1
        sed 's/\/\/.*//' "$file2" >$TMP_FILE2

        if diff -i -w $TMP_FILE1 $TMP_FILE2 >/dev/null
        then
            echo "$file1 is a copy of $file2"
        fi

    done
done

rm -f $TMP_FILE1 $TMP_FILE2


Improved version of plagiarism_detection.comments.sh
change all C strings to the letter 's' and change all identifiers to the letter 'v'.
Hence changes in strings & identifiers will be ignored.
mktemp provide suitable temporary filename, robustly & securely
TMP_FILE1=$(mktemp)
TMP_FILE2=$(mktemp)

# trap allows use to remove temporary files if program interrupted
trap 'rm -f $TMP_FILE1 $TMP_FILE2' EXIT

# s/"["]*"/s/g changes strings to the letter 's'
# It won't match a few C strings which is OK for our purposes

# s/[a-zA-Z_][a-zA-Z0-9_]*/v/g changes variable names to 'v'
# It will also change function names, keywords etc. which is OK for our purposes.

transform() {
    sed '
        s/\/\/.*//
        s/"[^"]"/s/g
        s/[a-zA-Z_][a-zA-Z0-9_]*/v/g
        ' $1
}

for file1 in "$@"
do
    for file2 in "$@"
    do
        test "$file1" = "$file2" &&
            break # avoid comparing pairs of assignments twice

        transform "$file1" >$TMP_FILE1
        transform "$file2" >$TMP_FILE2

        if diff -iBw $TMP_FILE1 $TMP_FILE2 >/dev/null
        then
            echo "$file1 is a copy of $file2"
        fi
    done
done


Improved version of plagiarism_detection.identifiers.sh
TMP_FILE1=$(mktemp)
TMP_FILE2=$(mktemp)
trap 'rm -f $TMP_FILE1 $TMP_FILE2' EXIT

# Note the use of sort so line reordering won't prevent detection of plagiarism.

transform() {
    sed '
        s/\/\/.*//
        s/"[^"]"/s/g
        s/[a-zA-Z_][a-zA-Z0-9_]*/v/g
        ' $1|
    sort
}

for file1 in "$@"
do
    for file2 in "$@"
    do
        test "$file1" = "$file2" &&
            break # avoid comparing pairs of assignments twice

        transform "$file1" >$TMP_FILE1
        transform "$file2" >$TMP_FILE2

        if diff -iBw $TMP_FILE1 $TMP_FILE2 >/dev/null
        then
            echo "$file1 is a copy of $file2"
        fi
    done
done
rm -f $TMP_FILE1 $TMP_FILE2


Improved version of plagiarism_detection.reordering.sh

Note use sha256sum to calculate a Cryptographic hash of the modified file
https://en.wikipedia.org/wiki/SHA-2 and use of sort && uniq to find files with the same hash
This allows execution time linear in the number of files
We could use a faster less secure hashing function instead of sha2
sha2hash() {
    sed '
        s/\/\/.*//
        s/"[^"]"/s/g
        s/[a-zA-Z_][a-zA-Z0-9_]*/v/g
        ' $1|
    sort|
    sha256sum
}

for file in "$@"
do
    echo "$(sha2hash $file) $file"
done|
sort|
uniq -w32 -d --all-repeated=separate



securely & robustly create a new temporary directory
temporary_directory=$(mktemp -d)

# ensure temporary directory + all its contents removed on exit
trap 'exit 1' INT TERM
trap 'rm -rf "$temporary_directory"; exit' EXIT

# change working directory to the new temporary directory
cd "$temporary_directory" || exit 1

# we are now in an empty directory
# and create any number of files & directories
# which all will be removed by the trap above

# e.g. create one thousand empty files
seq 1 1000|xargs touch

# print current directory and list files
pwd
ls -l
print positive integers for one second real time
my_process_id=$$

# launch a asynchronous sub-shell that will kill
# this process in a second
(sleep 1; kill $my_process_id) &

i=0
while true
do
    echo $i
    i=$((i + 1))
done
count slowly and laugh at interrupts (ctrl-C)
catch signal SIGINT and print message
trap 'echo ha ha' INT

n=0
while true
do
    echo "$n"
    sleep 1
    n=$((n + 1))
done
print positive integers for one second real time

catch signal SIGTERM, print message and exit
trap 'echo loop executed $n times in 1 second; exit 0' TERM

# launch a sub-shell that will terminate
# this process in 1 second
my_process_id=$$
(sleep 1; kill $my_process_id) &

n=0
while true
do
    n=$((n + 1))
done

compile the files of a muti-file C program in parallel use create_1001_file_C_program.sh to create suitable test data

On a CPU with n cores this can be (nearly) n times faster

If there are large number of C files we may exhaust memory or operating system resources

for f in "$@"
do
    clang -c "$f" &
done

# wait for the incremental compiles to finish
# and then compile .o files into single binary
wait
clang -o binary -- *.o

compile the files of a muti-file C program in parallel use create_1001_file_C_program.sh to create suitable test data
on Linux getconf will tell us how many cores the machine has otherwise assume 8
max_processes=$(getconf _NPROCESSORS_ONLN 2>/dev/null) ||
    max_processes=8

# NOTE: this breaks if a filename contains whitespace or quotes

echo "$@"|
xargs --max-procs=$max_processes --max-args=1 clang -c

clang -o binary -- *.o

compile the files of a multi-file C program in parallel use create_1001_file_C_program.sh to create suitable test data
find's -print0 option terminates pathnames with a '\0' xargs's --null option expects '\0' terminated input as '\0' can not appear in file names this can handle any filename
on Linux getconf will tell us how many cores the machine has if getconf assume 8
max_processes=$(getconf _NPROCESSORS_ONLN 2>/dev/null) ||
    max_processes=8

find "$@" -print0|
xargs --max-procs=$max_processes --max-args=1  --null clang -c

clang -o binary -- *.o

compile the files of a muti-file C program in parallel use create_1001_file_C_program.sh to create suitable test data
parallel clang -c '{}' ::: "$@"

clang -o binary -- *.o