Software Construction

            echo.c
        
#include <stdio.h>

// print arguments to stdout
int main(int argc, char *argv[]) {

    for (int i = 1; i < argc; i++) {
        if (i > 1) {
            fputc(' ', stdout);
        }
        fputs(argv[i], stdout);
    }
    fputc('\n', stdout);

    return 0;
}

        
            echo.py
        
import sys

def main():
    """
    print arguments to stdout
    """
    print(' '.join(sys.argv[1:]))

if __name__ == '__main__':
    main()

        
            print_argv.py
        
inspect how shell splits lines into program arguments (argv)

import sys
print(f'sys.argv = {sys.argv}')

        
            args.sh
        
A simple shell script demonstrating access to arguments.

echo My name is "$0"
echo My process number is $$
echo I have $# arguments

echo My command-line arguments are "$@"

echo My 5th argument is "'$5'"
echo My 10th argument is "'${10}'"

        
            accessing_args.sh
        
$ ./accessing_args.sh one two "three  four"
one
two
three four

for a in "$@"
do
  echo "$a"
done

l [file|directories...] - list files

Short shell scripts can be used for convenience.

It is common to put these scripts in a directory such as /home/z1234567/scripts then add this directory to PATH e.g in .bash_login
PATH=$PATH:/home/z1234567/scripts

Note: "$@" expands to the arguments to the script, but preserves whitespace in arguments.

ls -las "$@"

        
            word_frequency.sh
        
Count the number of time each different word occurs
in the files given as arguments, or stdin if no arguments,
e.g. word_frequency.sh dracula.txt

cat "$@" |                   # tr doesn't take filenames as arguments
tr 'A-Z' 'a-z'|              # map uppercase to lower case, better - tr '[:upper:]' '[:lower:]'
tr ' ' '\n' |                # convert to one word per line
tr -cd "a-z'" |              # remove all characters except a-z and '
grep -E -v '^$' |            # remove empty lines
sort |                       # place words in alphabetical order
uniq -c |                    # count how many times each word occurs
sort -rn                     # order in reverse frequency of occurrence

# notes:
# - first 2 tr commands could be combined
# - sed 's/ /\n/g' could be used instead of tr ' ' '\n'
# - sed "s/[^a-z']//g" could be used instead of tr -cd "a-z'"

        
            seq.v0.sh
        
simple emulation of /usr/bin/seq for a COMP(2041|9044) example

Print the integers 1..n with no argument checking

last=$1

number=1
while test $number -le "$last"
do
    echo $number
    number=$((number + 1))
done

        
            seq.v1.sh
        

        
simple emulation of /usr/bin/seq for a COMP(2041|9044) example



Print the integers 1..n or n..m



    
if test $# = 1
then
    first=1
    last=$1
elif test $# = 2
then
    first=$1
    last=$2
else
    echo "Usage: $0 <last> or  $0 <first> <last>" 1>&2
    exit 1
fi

number=$first
while test $number -le "$last"
do
    echo $number
    number=$((number + 1))
done

        
            seq.v2.sh
        

        
simple emulation of /usr/bin/seq for a COMP(2041|9044) example

Print the integers 1..n or m..n

    
if [ $# = 1 ]
then
    first=1
    last=$1
elif [ $# = 1 ]
then
    first=$1
    last=$2
else
    echo "Usage: $0 <last> or  $0 <first> <last>" 1>&2
    exit 1
fi

number=$first
while [ $number -le $last ]
do
    echo $number
    number=$((number + 1))
done

        
            watch_website.sh
        

        

Repeatedly download a specified web page
until a specified regexp matches its source
then notify the specified email address.


For example:



    
repeat_seconds=300  #check every 5 minutes

if test $# = 3
then
    url=$1
    regexp=$2
    email_address=$3
else
    echo "Usage: $0 <url> <regex> <email-address>" 1>&2
    exit 1
fi

while true
do
    if curl --silent "$url"|grep -E "$regexp" >/dev/null
    then
        # the 2nd echo is for testing, remove to really send email
        echo "Generated by $0" |
        echo mail -s "website '$url' now matches regex '$regexp'" "$email_address"
        exit 0
    fi
    sleep $repeat_seconds
done

        
            tolower.sh
        
Change the names of the specified files to lower case.
(simple version of the perl utility rename)

Note use of test to check if the new filename is unchanged.

Note the double quotes around $filename so filenames
containing spaces are not broken into multiple words

Note the use of mv -- to stop mv interpreting a
filename beginning with - as an option

Note files named -n or -e still break the script
because echo will treat them as an option,

if test $# = 0
then
    echo "Usage $0: <files>" 1>&2
    exit 1
fi

for filename in "$@"
do
    new_filename=$(
        echo "$filename"|
        tr '[:upper:]' '[:lower:]'
        )

    test "$filename" = "$new_filename" &&
        continue

    if test -r "$new_filename"
    then
        echo "$0: $new_filename exists" 1>&2
    elif test -e "$filename"
    then
        mv -- "$filename" "$new_filename"
    else
        echo "$0: $filename not found" 1>&2
    fi

done

        
            read_response_if.sh
        

        
demonstrate simple use of read



    
echo -n "Do you like learning Shell? "
read answer

# get first letter of answer connverted to lower case
answer="$(
    echo "$answer"|
    cut -c1|
    tr A-Z a-z
    )"

if test "$answer" = "y"
then
    response=":)"
elif test "$answer" = "n"
then
    response=":("
else
    response="??"
fi

echo "$response"

        
            read_cat.sh
        
over-simple /bin/cat emulation using read

setting the special variable IFS to the empty string
stops trailing white space being stripped

for file in "$@"
do
    while IFS= read -r line
    do
        echo "$line"
    done <$file
done

        
            read_response_case.sh
        
demonstrate use of a case statement

echo -n "Do you like learning Shell? "
read answer

case "$answer" in
[Yy]*)
    response=":)"
    ;;

[Nn]*)
    response=":("
    ;;

*)
    response="??"
esac

echo "$response"

        
            create_1001_file_C_program.sh
        

        
create 1001 C files, compile and run them

this program creates 1000 files f0.c .. f999.c
file f$i.c contains function f$i which returns $i
for example file42.c contains function f42 which returns 42
main.c is created with code to call all 1000 functions
and print the sum of their return values

first add the initial lines to main.c
note the use of quotes on eof to disable variable interpolation
in the here document



    
cat >main.c <<'eof'
#include <stdio.h>

int main(void) {
    int v = 0 ;
eof

i=0
while test $i -lt 1000
do
    # add a line to main.c to call the function f$i

    cat >>main.c <<eof
    int f$i(void);
    v += f$i();
eof

    # create file$i.c containing function f$i

    cat >file$i.c <<eof
int f$i(void) {
    return $i;
}
eof

    i=$((i + 1))
done

cat >>main.c <<'eof'
    printf("%d\n", v);
    return 0;
}
eof

# compile and run the 1001 C files
time clang main.c file*.c
./a.out

        
            favourite_command.sh
        
demonstrate simple use of a shell function

favourite_command() {
    name=$1
    command=$2
    echo "My name is $name, my favourite Unix command is $command."
}

favourite_command Andrew "uniq"
favourite_command Dylan "jq"
favourite_command Grace "sed"

        
            local.sh
        

        


print print numbers < 1000


note use of local Shell builtin to scope a variable
without the local declaration
the variable i in the function would be global
and would break the bottom while loop


local is not (yet) POSIX but is widely supported



    
is_prime() {
    local n i
    n=$1
    i=2
    while test $i -lt $n
    do
        test $((n % i)) -eq 0 &&
            return 1
        i=$((i + 1))
    done
    return 0
}

i=0
while test $i -lt 1000
do
    is_prime $i &&
        echo $i
    i=$((i + 1))
done

        
            bash_arithmetic.sh
        

        


print print numbers < 1000



Rewritten to use bash arithmetic extension (())

This makes the program more readable but less portable.



    
is_prime() {
    local n i
    n=$1
    i=2
    while ((i < n))
    do
        if ((n % i == 0))
        then
            return 1
        fi
        i=$((i + 1))
    done
    return 0
}

i=0
while ((i < 1000))
do
    is_prime $i && echo $i
    i=$((i + 1))
done

        
            plagiarism_detection.simple_diff.sh
        
Run as plagiarism_detection.simple_diff.sh <files>

Report if any of the files are copies of each other

Note use of diff -iw so changes in white-space or case are ignored

for file1 in "$@"
do
    for file2 in "$@"
    do
        test "$file1" = "$file2" &&
            break # avoid comparing pairs of assignments twice

        if diff -iBw "$file1" "$file2" >/dev/null
        then
            echo "$file1 is a copy of $file2"
        fi

    done
done

        
            plagiarism_detection.comments.sh
        
Improved version of plagiarism_detection.simple_diff.sh

The substitution s/\/\/.*// removes // style C comments.

This means changes in comments won't affect comparisons.

Note use of temporary files is insecure - an attacker can anticipate the filename

TMP_FILE1=/tmp/plagiarism_tmp1$$
TMP_FILE2=/tmp/plagiarism_tmp2$$

for file1 in "$@"
do
    for file2 in "$@"
    do
        test "$file1" = "$file2" &&
            break # avoid comparing pairs of assignments twice

        sed 's/\/\/.*//' "$file1" >$TMP_FILE1
        sed 's/\/\/.*//' "$file2" >$TMP_FILE2

        if diff -i -w $TMP_FILE1 $TMP_FILE2 >/dev/null
        then
            echo "$file1 is a copy of $file2"
        fi

    done
done

rm -f $TMP_FILE1 $TMP_FILE2

        
            plagiarism_detection.identifiers.sh
        

        



Improved version of plagiarism_detection.comments.sh


change all C strings to the letter 's'
and change all identifiers to the letter 'v'.

Hence changes in strings & identifiers will be ignored.


mktemp provide suitable temporary filename, robustly & securely

    
TMP_FILE1=$(mktemp)
TMP_FILE2=$(mktemp)

# trap allows use to remove temporary files if program interrupted
trap 'rm -f $TMP_FILE1 $TMP_FILE2' EXIT

# s/"["]*"/s/g changes strings to the letter 's'
# It won't match a few C strings which is OK for our purposes

# s/[a-zA-Z_][a-zA-Z0-9_]*/v/g changes variable names to 'v'
# It will also change function names, keywords etc. which is OK for our purposes.

transform() {
    sed '
        s/\/\/.*//
        s/"[^"]"/s/g
        s/[a-zA-Z_][a-zA-Z0-9_]*/v/g
        ' $1
}

for file1 in "$@"
do
    for file2 in "$@"
    do
        test "$file1" = "$file2" &&
            break # avoid comparing pairs of assignments twice

        transform "$file1" >$TMP_FILE1
        transform "$file2" >$TMP_FILE2

        if diff -iBw $TMP_FILE1 $TMP_FILE2 >/dev/null
        then
            echo "$file1 is a copy of $file2"
        fi
    done
done

        
            plagiarism_detection.reordering.sh
        

        



Improved version of plagiarism_detection.identifiers.sh



    
TMP_FILE1=$(mktemp)
TMP_FILE2=$(mktemp)
trap 'rm -f $TMP_FILE1 $TMP_FILE2' EXIT

# Note the use of sort so line reordering won't prevent detection of plagiarism.

transform() {
    sed '
        s/\/\/.*//
        s/"[^"]"/s/g
        s/[a-zA-Z_][a-zA-Z0-9_]*/v/g
        ' $1|
    sort
}

for file1 in "$@"
do
    for file2 in "$@"
    do
        test "$file1" = "$file2" &&
            break # avoid comparing pairs of assignments twice

        transform "$file1" >$TMP_FILE1
        transform "$file2" >$TMP_FILE2

        if diff -iBw $TMP_FILE1 $TMP_FILE2 >/dev/null
        then
            echo "$file1 is a copy of $file2"
        fi
    done
done
rm -f $TMP_FILE1 $TMP_FILE2

        
            plagiarism_detection.hash.sh
        
Improved version of plagiarism_detection.reordering.sh

Note use sha256sum to calculate a Cryptographic hash of the modified file
https://en.wikipedia.org/wiki/SHA-2
and  use of sort && uniq to find files with the same hash

This allows execution time linear in the number of files

We could use a faster less secure hashing function instead of sha2

sha2hash() {
    sed '
        s/\/\/.*//
        s/"[^"]"/s/g
        s/[a-zA-Z_][a-zA-Z0-9_]*/v/g
        ' $1|
    sort|
    sha256sum
}

for file in "$@"
do
    echo "$(sha2hash $file) $file"
done|
sort|
uniq -w32 -d --all-repeated=separate

            create_temporary_directory.sh
        
securely & robustly create a new temporary directory

temporary_directory=$(mktemp -d)

# ensure temporary directory + all its contents removed on exit
trap 'exit 1' INT TERM
trap 'rm -rf "$temporary_directory"; exit' EXIT

# change working directory to the new temporary directory
cd "$temporary_directory" || exit 1

# we are now in an empty directory
# and create any number of files & directories
# which all will be removed by the trap above

# e.g. create one thousand empty files
seq 1 1000|xargs touch

# print current directory and list files
pwd
ls -l

        
            async.v0.sh
        
print positive integers for one second real time

my_process_id=$$

# launch a asynchronous sub-shell that will kill
# this process in a second
(sleep 1; kill $my_process_id) &

i=0
while true
do
    echo $i
    i=$((i + 1))
done

        
            laugh.sh
        
count slowly and laugh at interrupts (ctrl-C)

catch signal SIGINT and  print message

trap 'echo ha ha' INT

n=0
while true
do
    echo "$n"
    sleep 1
    n=$((n + 1))
done

        
            async.v1.sh
        
print positive integers for one second real time

catch signal SIGTERM, print message and exit

trap 'echo loop executed $n times in 1 second; exit 0' TERM

# launch a sub-shell that will terminate
# this process in 1 second
my_process_id=$$
(sleep 1; kill $my_process_id) &

n=0
while true
do
    n=$((n + 1))
done

        
            parallel_compile.v0.sh
        
compile the files of a muti-file C program in parallel
use create_1001_file_C_program.sh to create suitable test data

On a CPU with n cores this can be (nearly) n times faster

If there are large number of C files we
may exhaust memory or operating system resources

for f in "$@"
do
    clang -c "$f" &
done

# wait for the incremental compiles to finish
# and then compile .o files into single binary
wait
clang -o binary -- *.o

        
            parallel_compile.v1.sh
        
compile the files of a muti-file C program in parallel
use create_1001_file_C_program.sh to create suitable test data

on Linux getconf will tell us how many cores the machine has
otherwise assume 8

max_processes=$(getconf _NPROCESSORS_ONLN 2>/dev/null) ||
    max_processes=8

# NOTE: this breaks if a filename contains whitespace or quotes

echo "$@"|
xargs --max-procs=$max_processes --max-args=1 clang -c

clang -o binary -- *.o

        
            parallel_compile.v2.sh
        
compile the files of a multi-file C program in parallel
use create_1001_file_C_program.sh to create suitable test data

find's -print0 option terminates pathnames with a '\0'
xargs's --null option expects '\0' terminated input
as '\0' can not appear in file names this can handle any filename

on Linux getconf will tell us how many cores the machine has
if getconf assume 8

max_processes=$(getconf _NPROCESSORS_ONLN 2>/dev/null) ||
    max_processes=8

find "$@" -print0|
xargs --max-procs=$max_processes --max-args=1  --null clang -c

clang -o binary -- *.o

        
            parallel_compile.v3.sh
        
compile the files of a muti-file C program in parallel
use create_1001_file_C_program.sh to create suitable test data

parallel clang -c '{}' ::: "$@"

clang -o binary -- *.o