COMP(2041|9044) 19T2
COMP(2041|9044) Software Construction
COMP(2041|9044) 19T2

COMP(2041|9044) Course Resources

Resources
Lab/Test/Assignment
Other

COMP(2041|9044) Week-by-Week

COMP(2041|9044) Topic-by-Topic

Course Overview
Filters

External resources

regex1011: online regex tester

Simple /bin/cat emulation.
    #include <stdio.h>
#include <stdlib.h>

// write bytes of stream to stdout
void process_stream(FILE *in) {
    while (1) {
        int ch = fgetc(in);
        if (ch == EOF)
             break;
        if (fputc(ch, stdout) == EOF) {
            fprintf(stderr, "cat:");
            perror("");
            exit(1);
        }
    }
}

// process files given as arguments
// if no arguments process stdin
int main(int argc, char *argv[]) {
    if (argc == 1)
        process_stream(stdin);
    else
        for (int i = 1; i < argc; i++) {
            FILE *in = fopen(argv[i], "r");
            if (in == NULL) {
                fprintf(stderr, "%s: %s: ", argv[0], argv[i]);
                perror("");
                return 1;
            }
            process_stream(in);
            fclose(in);
        }
    return 0;
}

Download cat.c


Simple /usr/bin/wc emulation.
    #include <stdio.h>
#include <stdlib.h>
#include <ctype.h>

// count lines, words, chars in stream
void process_stream(FILE *in) {
    int n_lines = 0, n_words = 0, n_chars = 0;
    int in_word = 0, c;
    while ((c = fgetc(in)) != EOF) {
        n_chars++;
        if (c == '\n')
            n_lines++;
        if (isspace(c))
            in_word = 0;
        else if (!in_word) {
            in_word = 1;
            n_words++;
        }
    }
    printf("%6d %6d %6d", n_lines, n_words, n_chars);
}

// process files given as arguments
// if no arguments process stdin
int main(int argc, char *argv[]) {
    if (argc == 1)
        process_stream(stdin);
    else
        for (int i = 1; i < argc; i++) {
            FILE *in = fopen(argv[i], "r");
            if (in == NULL) {
                fprintf(stderr, "%s: %s: ", argv[0], argv[i]);
                perror("");
                return 1;
            }
            process_stream(in);
            printf(" %s\n", argv[i]);
            fclose(in);
        }
    return 0;
}

Download wc.c


Over-simple /usr/bin/grep emulation.
    #include <stdio.h>
#include <stdlib.h>
#include <string.h>

// print lines containing the specified substring
// breaks on long lines, does not implement regexs or other grep features
void process_stream(FILE *stream, char *stream_name, char *substring) {
    char line[65536];
    int line_number = 1;
    while (fgets(line, sizeof line, stream) != NULL) {
        if (strstr(line, substring) != NULL)
            printf("%s:%d:%s", stream_name, line_number, line);
        line_number = line_number + 1;
    }
}

// process files given as arguments
// if no arguments process stdin
int main(int argc, char *argv[]) {
    if (argc == 2)
        process_stream(stdin, "<stdin>", argv[1]);
    else
        for (int i = 2; i < argc; i++) {
            FILE *in = fopen(argv[i], "r");
            if (in == NULL) {
                fprintf(stderr, "%s: %s: ", argv[0], argv[i]);
                perror("");
                return 1;
            }
            process_stream(in, argv[i], argv[1]);
            fclose(in);
        }
    return 0;
}

Download grep.c


Over-simple /usr/bin/uniq emulation.
    #include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_LINE 65536

// cope stream to stdout except for repeated lines
void process_stream(FILE *stream) {
    char line[MAX_LINE];
    char lastLine[MAX_LINE];
    int line_number = 0;

    while (fgets(line, MAX_LINE, stdin) != NULL) {
        if (line_number == 0 || strcmp(line, lastLine) != 0) {
            fputs(line, stdout);
            strncpy(lastLine, line, MAX_LINE);
        }
        line_number++;
    }
}

// process files given as arguments
// if no arguments process stdin
int main(int argc, char *argv[]) {
    if (argc == 1)
        process_stream(stdin);
    else
        for (int i = 1; i < argc; i++) {
            FILE *in = fopen(argv[i], "r");
            if (in == NULL) {
                fprintf(stderr, "%s: %s: ", argv[0], argv[i]);
                perror("");
                return 1;
            }
            process_stream(in);
            fclose(in);
        }
    return 0;
}

Download uniq.c

This file contains examples of the use of the most common Unix filter programs (egrep, wc, head, etc.) It also contains solutions to the exercises discussed in lectures.
  1. Consider a a file course_codes containing UNSW course codes and names.
    ls -l course_codes
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    wc course_codes
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    head course_codes
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    It looks like the code is separated from the title by a number of spaces. We can check this via cat -A:
    head -5 course_codes | cat -A
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    This shows us that our initial guess was wrong, and there's actually a tab character between the course code and title (shown as ^I by cat -A). Also, the location of the end-of-line marker ($) indicates that there are no trailing spaces or tabs.

    If we need to know what COMP courses there are:

    egrep -c COMP course_codes
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    egrep COMP course_codes
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    Either of the two commands below tell us which courses have "comp" in their name or code (in upper or lower case).
    tr A-Z a-z <course_codes | egrep comp
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    egrep -i comp course_codes
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    The second one looks better because the data itself isn't transformed, only the internal comparisons.

    If we want to know how many courses have "computing" or "computer" in their title, we have to use egrep, which recognises the alternative operator "|", and wc to count the number of matches. There are a couple of ways to construct the regexp:

    egrep -i 'computer|computing' course_codes | wc
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    egrep -i 'comput(er|ing)' course_codes | wc
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    If you don't like the irrelevant word and character counts, use wc -l.

    Most of these 80 matches were CSE offerings, whose course codes begin with COMP, SENG or BINF. Which of the matches were courses offered by other schools?

    Think about it for a moment.... There's no "but not" regexp operator, so instead we construct a composite filter with an extra step to deal with eliminating the CSE courses:

    egrep -i 'computer|computing' course_codes | egrep -v '^(COMP|SENG|BINF)'
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    The last ones are from the Computer Science school at ADFA.
  2. Consider a file called enrollments which contains data about student enrollment in courses. There is one line for each student enrolled in a course:
    ls -l enrollments
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    wc enrollments
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    head enrollments
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    The following commands count how many students are enrolled in COMP2041 or COMP9041. The course IDs differ only in one character, so a character class is used instead of alternation.

    The first version below is often ferred because initially you may want to know "how many xxx", then having found that out the next question might be, "well give me a sample of 10 or so of them". Then it's a simple matter of replacing wc by head.

    egrep '^COMP[29]041' enrollments | wc -l
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    egrep -c '^COMP[29]041' enrollments
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    The last field field in the enrollment file records the student's gender. This command counts the number of female students enrolled in the courses.
    egrep '^COMP[29]041' enrollments | egrep '|F$' | wc -l
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    Not a very good gender balance, is it?

    By the way, the two egreps could have been combined into one. How?

    This command will give a sorted list of course codes:

    cut -d'|' -f1 enrollments | sort | uniq
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    The student records system known to users as myUNSW is built on top of a large US product known as PeopleSoft (the company was taken over by Oracle in 2004). On a scale of 1 to 10 the quality of the design of this product is about 3. One of its many flaws is its insistence that everybody must have two names, a "Last Name" and a "First Name", neither of which can be empty. To signify that a person has only a single name (common in Sri Lanka, for example), the system stores a dot character in the "First Name" field. The enrollments file shows the data as stored in the system, with a comma and space separating the component names. It has some single-named people (note that the names themselves have been disguised):
    egrep ', \.' enrollments
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    What would have happened if we forgot the backslash?

    If we wanted to know how many different students there were of this type rather than all enrollments, just cut out the second field (student ID) and use uniq. It's not necessary to sort the data in this case only because the data is clustered, that is, all equal values are adjacent although they're not necessarily sorted.

    egrep ', \.' enrollments | cut -d'|' -f2 | uniq | wc
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
  3. Now let us turn our attention from students and courses to programs. The enrollments file, as well as linking a student to the courses they're taking, also links them to the program (degree) that they are currently enrolled in. Consider that we want to find out the program codes of the students taking COMP2041. The following pipeline will do this:
    egrep 'COMP[29]041' enrollments | cut -d'|' -f4 | cut -d/ -f1  |sort | uniq
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    If we want to know how many students come from each program, ordered from most common program to least common program, try this:
    egrep COMP[29]041 enrollments | cut -d'|' -f4 | cut -d/ -f1 | sort | uniq -c | sort -nr
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    Note that a tab is usually inserted between the count and the data, but not all implementations of the uniq command ensure this.
  4. Consider a file called program_codes that contains the code and name of each program offered at UNSW (excluding research programs):

    wc program_codes
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    head program_codes
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    We can use this file to give more details of the programs that COMP2041 students are taking, if some users don't want to deal with just course codes.
    egrep COMP[29]041 enrollments | cut -d'|' -f4 | cut -d/ -f1 |sort | uniq | join - program_codes
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    We can combine the enrollment counts (for both courses) with the program titles to produce a self-descriptive tally. It's even better if it's in decreasing order of popularity, so after joining the tallies with the program titles, re-sort the composite data:
    egrep 'COMP[29]041' enrollments | cut -d'|' -f4 | cut -d/ -f1 |sort | uniq -c | join -1 2 -a 1 - program_codes  | sort -k2rn
    INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    Note the curious extra space before the title of programs 8682 and 8684. It took me a while to work it out, can you? (Hint: how are the programs shown in the enrollment file?) Suggest an appopriate change to the pipeline.
  5. Lecture exercises on wc:
    1. how many different programs does UNSW offer?
      wc -l program_codes
      INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    2. how many times was WebCMS accessed?
      wc -l access_log
      INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    3. how many students are studying in CSE?
      wc -l enrollments
      INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'

      The above solutions assume that we're talking about total enrollments. If the question actually meant how many distinct indivduals are studying courses offered by CSE, then we'd answer it as:

      cut -d'|' -f2 enrollments | sort | uniq | wc -l
      INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    4. how many words are there in the book?
      wc -w book
      INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
    5. how many lines are there in the story?
      wc -l story
      INTERNAL ERROR:[Errno 2] No such file or directory: '/web/cs2041/code/filters/': '/web/cs2041/code/filters/'
Shell

External resources

Shell commands for power users.

A simple shell script demonstrating access to arguments.
    echo My name is $0
echo My process number is $$
echo I have $# arguments
echo My arguments separately are $*
echo My arguments together are "$@"
echo My 5th argument is "'$5'"

Download args.sh

l [file|directories...] - list files
Short Shell scripts can be used for convenience.
Note: "$@" like $* expands to the arguments to the script, but preserves the integrity of each argument if it contains spaces.
    ls -las "$@"

Download l


Count the number of time each different word occurs in the files given as arguments, e.g. word_frequency.sh dracula.txt
    sed 's/ /\n/g' "$@"|      # convert to one word per line
tr A-Z a-z|               # map uppercase to lower case
sed "s/[^a-z']//g"|       # remove all characters except a-z and '
egrep -v '^$'|            # remove empty lines
sort|                     # place words in alphabetical order
uniq -c|                  # use uniq to count how many times each word occurs
sort -n                   # order words in frequency of occurrance

Download word_frequency.sh


Print the integers 1..n if 1 argument given.
Print the integers n..m if 2 arguments given.
    if test $# = 1
then
    start=1
    finish=$1
elif test $# = 2
then
    start=$1
    finish=$2
else
    echo "Usage: $0 <start> <finish>" 1>&2
    exit 1
fi

for argument in "$@"
do
    # clumsy way to check if argument is a valid integer
    if echo "$argument"|egrep -v '^-?[0-9]+$' >/dev/null
    then
        echo "$0: argument '$argument' is not an integer" 1>&2
        exit 1
    fi
done

number=$start
while test $number -le $finish
do
    echo $number
    number=`expr $number + 1`    # or number=$(($number + 1))
done

Download iota.v1.sh


Print the integers 1..n if 1 argument given.
Print the integers n..m if 2 arguments given.

Using bash arothmetic which is mnore reabable but less portable
    if (($# == 1))
then
    start=1
    finish=$1
elif (($# == 2))
then
    start=$1
    finish=$2
else
    echo "Usage: $0 <start> <finish>" 1>&2
    exit 1
fi

for argument in "$@"
do
    # This use of a regex is a bash extension missing from many Shells
    # It should be avoided if portability is a concern
    if [[ "$argument" =~ '^-?[0-9]+$' ]]
    then
        echo "$0: argument '$argument' is not an integer" 1>&2
        exit 1
    fi
done

number=$start
while ((number <= finish))
do
    echo $number
    number=$((number + 1))
done

Download iota.v2.sh


Change the names of the specified files to lower case.
Note the use of test to check if the new filename differs from the old.
The perl utility rename provides a more general alternative.

Note without the double quotes below filenames containing spaces would be handled incorrectly.

Note also the use of -- to avoid mv interpreting a filename beginning with - as an option

Although a files named -n or -e will break the script because echo will treat them as an option,
    if test $# = 0
then
    echo "Usage $0: <files>" 1>&2
    exit 1
fi

for filename in "$@"
do
    new_filename=`echo "$filename" | tr A-Z a-z`
    test "$filename" = "$new_filename" && continue
    if test -r "$new_filename"
    then
        echo "$0: $new_filename exists" 1>&2
    elif test -e "$filename"
    then
        mv -- "$filename" "$new_filename"
    else
        echo "$0: $filename not found" 1>&2
    fi
done

Download tolower.sh


Repeatedly download a specified web page until a specified regexp matches its source then notify the specified email address.

For example:
    repeat_seconds=300  #check every 5 minutes

if test $# = 3
then
    url=$1
    regexp=$2
    email_address=$3
else
    echo "Usage: $0 <url> <regex>" 1>&2
    exit 1
fi

while true
do
    if wget -O- -q "$url"|egrep "$regexp" >/dev/null
    then
        echo "Generated by $0" | mail -s "$url now matches $regexp" $email_address
        exit 0
    fi
    sleep $repeat_seconds
done

Download watch_website.sh

create 1001 C files, compile and runs them
file f$i.c contains a defintion of function f$i which returns $i for example file42.c will contain a function f42 that returns 42
main.c contains code to call all 1000 functions and print the sum of their return values

add the initial lines to main.c note the use of quotes on eof to disable variable interpolation in the here document
    cat >main.c <<'eof'
#include <stdio.h>

int main(void) {
    int v = 0 ;
eof

i=0
while test $i -lt 1000
do
    # add a line to main.c to call the function f$i

    cat >>main.c <<eof
    int f$i(void);
    v += f$i();
eof

    # create file$i.c containing function f$i

    cat >file$i.c <<eof
int f$i(void) {
    return $i;
}
eof

    i=$((i + 1))
done

cat >>main.c <<'eof'
    printf("%d\n", v);
    return 0;
}
eof

# compile and run the 1001 C files

time clang main.c file*.c
./a.out

Download create_1001_file_C_program.sh



Run as plagiarism_detection.simple_diff.sh <files>

Report if any of the files are copies of each other

The use of diff -iw means changes in white-space or case won't affect comparisons
    for file1 in "$@"
do
    for file2 in "$@"
    do
        test "$file1" = "$file2" && break
        if diff -i -w "$file1" "$file2" >/dev/null
        then
            echo "$file1 is a copy of $file2"
        fi
    done
done

Download plagiarism_detection.simple_diff.sh




Improved version of plagiarism_detection.simple_diff.sh

The substitution s/\/\/.*// removes // style C comments.
This means changes in comments won't affect comparisons.

Note use of temporary files
    TMP_FILE1=/tmp/plagiarism_tmp1$$
TMP_FILE2=/tmp/plagiarism_tmp2$$


for file1 in "$@"
do
    for file2 in "$@"
    do
        if test "$file1" = "$file2"
        then
            break # avoid comparing pairs of assignments twice
        fi
        sed 's/\/\/.*//' "$file1" >$TMP_FILE1
        sed 's/\/\/.*//' "$file2" >$TMP_FILE2
        if diff -i -w $TMP_FILE1 $TMP_FILE2 >/dev/null
        then
            echo "$file1 is a copy of $file2"
        fi
    done
done
rm -f $TMP_FILE1 $TMP_FILE2

Download plagiarism_detection.comments.sh




Improved version of plagiarism_detection.comments.sh

This version converts C strings to the letter 's' and it converts identifiers to the letter 'v'.
Hence changes in strings & identifiers won't prevent detection of plagiarism.

The substitution s/"["]*"/s/g changes strings to the letter 's'
This pattern won't match a few C strings which is fine for our purposes

The s/[a-zA-Z_][a-zA-Z0-9_]*/v/g changes all variable names to 'v' which means changes to variable names won't affect comparison.
Note this also may change function names, keywords etc.
This is fine for our purposes.

    TMP_FILE1=/tmp/plagiarism_tmp1$$
TMP_FILE2=/tmp/plagiarism_tmp2$$
substitutions='s/\/\/.*//;s/"[^"]"/s/g;s/[a-zA-Z_][a-zA-Z0-9_]*/v/g'

for file1 in "$@"
do
    for file2 in "$@"
    do
        test "$file1" = "$file2" && break # don't compare pairs of assignments twice
        sed "$substitutions" "$file1" >$TMP_FILE1
        sed "$substitutions" "$file2" >$TMP_FILE2
        if diff -i -w $TMP_FILE1 $TMP_FILE2 >/dev/null
        then
            echo "$file1 is a copy of $file2"
        fi
    done
done
rm -f $TMP_FILE1 $TMP_FILE2

Download plagiarism_detection.identifiers.sh




Improved version of plagiarism_detection.identifiers.sh

Note the use of sort so line reordering won't prevent detection of plagiarism.
    TMP_FILE1=/tmp/plagiarism_tmp1$$
TMP_FILE2=/tmp/plagiarism_tmp2$$
substitutions='s/\/\/.*//;s/"[^"]"/s/g;s/[a-zA-Z_][a-zA-Z0-9_]*/v/g'

for file1 in "$@"
do
    for file2 in "$@"
    do
        test "$file1" = "$file2" && break # don't compare pairs of assignments twice
        sed "$substitutions" "$file1"|sort >$TMP_FILE1
        sed "$substitutions" "$file2"|sort >$TMP_FILE2
        if diff -i -w $TMP_FILE1 $TMP_FILE2 >/dev/null
        then
            echo "$file1 is a copy of $file2"
        fi
    done
done
rm -f $TMP_FILE1 $TMP_FILE2

Download plagiarism_detection.reordering.sh




Improved version of plagiarism_detection.reordering.sh

Note use md5sum to calculate a Cryptographic hash of the modified file http://en.wikipedia.org/wiki/MD5 and then use sort && uniq to find files with the same hash

This allows execution time linear in the number of files
    substitutions='s/\/\/.*//;s/"[^"]"/s/g;s/[a-zA-Z_][a-zA-Z0-9_]*/v/g'

for file in "$@"
do
    echo `sed "$substitutions" "$file"|sort|md5sum` $file
done|
sort|
uniq -w32 -d --all-repeated=separate|
cut -c36-

Download plagiarism_detection.md5_hash.sh


Printall occurances of executable programs with the specified names in $PATH
Note use of tr to produce a space-separated list of directories suitable for a for loop.
Breaks if directories contain spaces (fixing this left as an exercise).
    if test $# = 0
then
    echo "Usage $0: <program>" 1>&2
    exit 1
fi

for program in "$@"
do
    program_found=''
    for directory in `echo "$PATH" | tr ':' ' '`
    do
        f="$directory/$program"
        if test -x "$f"
        then
            ls -ld "$f"
            program_found=1
        fi
    done
    if test -z $program_found
    then
        echo "$program not found"
    fi
done

Download where.v0.sh


Print all occurances of executable programs with the specified names in $PATH
Note use of tr to produce a list of directories one per line suitable for a while loop.
Won't work if directories contain spaces (fixing this left as an exercise)
    if test $# = 0
then
    echo "Usage $0: <program>" 1>&2
    exit 1
fi

for program in "$@"
do
    echo "$PATH"|
    tr ':' '\n'|
    while read directory
    do
        f="$directory/$program"
        if test -x "$f"
        then
            ls -ld "$f"
        fi
    done|
    egrep '.' || echo "$program not found"
done

Download where.v1.sh


Print all occurances of executable programs with the specified names in $PATH
Note use of tr to produce a list of directories one per line suitable for a while loop.
Won't work if directories contain new-lines (fixing this left as an exercise)
    if test $# = 0
then
    echo "Usage $0: <program>" 1>&2
    exit 1
fi
for program in "$@"
do
    n_path_components=`echo $PATH|tr -d -c :|wc -c`
    index=1
    while test $index -le $n_path_components
    do
        directory=`echo "$PATH"|cut -d: -f$index`
        f="$directory/$program"
        if test -x "$f"
        then
            ls -ld "$f"
            program_found=1
        fi
        index=`expr $index + 1`
    done
    test -n $program_found || echo "$program not found"
done

Download where.v2.sh

Perl Intro

External resources

perl.org documentation, FAQs & tutorialsa quick referencecourse lecture notesCSE CPAN mirror
compute Pythagoras' Theorem
    print "Enter x: ";
$x = <STDIN>;
chomp $x;
print "Enter y: ";
$y = <STDIN>;
chomp $y;
$pythagoras = sqrt $x * $x + $y * $y;
print "The square root of $x squared + $y squared is $pythagoras\n";

Download pythagoras.pl


Read numbers until end of input (or a non-number) is reached then print the sum of the numbers
    $sum = 0;
while ($line = <STDIN>) {
    $line =~ s/^\s*//; # remove leading white space
    $line =~ s/\s*$//; # remove leading trailing white space
    # Test if string looks like an integer or real (scientific notation not handled!)
    if ($line !~ /^\d[.\d]*$/) {
        last;
    }
    $sum += $line;
}
print "Sum of the numbers is $sum\n";

Download sum_stdin.pl


Simple example reading a line of input and examining characters
    printf "Enter some input: ";
$line = <STDIN>;

if (!defined $line) {
    die "$0: could not read any characters\n";
}

chomp $line;
$n_chars = length $line;
print "That line contained $n_chars characters\n";

if ($n_chars > 0) {
    $first_char = substr($line, 0, 1);
    $last_char = substr($line, $n_chars - 1, 1);
    print "The first character was '$first_char'\n";
    print "The last character was '$last_char'\n";
}

Download line_chars.pl



Reads lines of input until end-of-input
Print snap! if two consecutive lines are identical
    print "Enter line: ";
$last_line = <STDIN>;
print "Enter line: ";
while ($line = <STDIN>) {
    if ($line eq $last_line) {
        print "Snap!\n";
    }
    $last_line = $line;
    print "Enter line: ";
}

Download snap_consecutive.pl

create a string of size 2^n by concatenation
    die "Usage: $0 <n>\n" if @ARGV != 1;
$n = 0;
$string = '@';
while ($n  < $ARGV[0]) {
    $string = "$string$string";
    $n++;
}
printf "String of 2^%d = %d characters created\n", $n, length $string;

Download exponential_concatenation.pl

Perl Arrays

Perl implementation of /bin/echo always writes a trailing space
    foreach $arg (@ARGV) {
    print $arg, " ";
}
print "\n";

Download echo.0.pl


Perl implementation of /bin/echo
    print "@ARGV\n";

Download echo.1.pl


Perl implementation of /bin/echo
    print join(" ", @ARGV), "\n";

Download echo.2.pl

sum integers supplied as command line arguments no check that aguments are numeric
    $sum = 0;
foreach $arg (@ARGV) {
    $sum += $arg;
}
print "Sum of the numbers is $sum\n";

Download sum_arguments.pl


    while (1) {
    print "Enter array index: ";
    $n = <STDIN>;
    if (!$n) {
        last;
    }
    chomp $n;
    $a[$n] = 42;
    print "Array element $n now contains $a[$n]\n";
    printf "Array size is now %d\n", $#a+1;
}

Download array_growth_demo.pl


Count the number of lines on standard input.
    $line_count = 0;
while (1) {
    $line = <STDIN>;
    last if !$line;
    $line_count++;
}
print "$line_count lines\n";

Download line_count.0.pl


Count the number of lines on standard input - slightly more concise
    $line_count = 0;
while (<STDIN>) {
    $line_count++;
}
print "$line_count lines\n";

Download line_count.1.pl


Count the number of lines on standard input - using backwards while to be really concise
    $line_count = 0;
$line_count++ while <STDIN>;
print "$line_count lines\n";

Download line_count.2.pl


Count the number of lines on standard input. read the input into an array and use the array size.
    @lines = <STDIN>;
print $#lines+1, " lines\n";

Download line_count.3.pl


Count the number of lines on standard input.
Assignment to () forces a list context and hence reading all lines of input.
The special variable $. contains the current line number
    () = <STDIN>;
print "$. lines\n";

Download line_count.4.pl


Print lines read from stdin in reverse order.
In a C-style
    while ($line = <STDIN>) {
    $line[$line_number++] = $line;
}


for ($line_number = $#line; $line_number >= 0 ; $line_number--) {
    print $line[$line_number];
}

Download reverse_lines.0.pl


Print lines read from stdin in reverse order.
Using <> in a list context
    @line = <STDIN>;
for ($line_number = $#line; $line_number >= 0 ; $line_number--) {
    print $line[$line_number];
}

Download reverse_lines.1.pl


Print lines read from stdin in reverse order.
Using <> in a list context & reverse
    @lines = <STDIN>;
print reverse @lines;


Download reverse_lines.2.pl


Print lines read from stdin in reverse order.
Using <> in a list context & reverse
    print reverse <STDIN>;


Download reverse_lines.3.pl


Print lines read from stdin in reverse order.
Using push & pop
    while ($line = <STDIN>) {
    push @lines, $line;
}
while (@lines) {
    my $line = pop @lines;
    print $line;
}

Download reverse_lines.4.pl


Print lines read from stdin in reverse order.
More succintly with pop
    @lines = <STDIN>;
while (@lines) {
    print pop @lines;
}

Download reverse_lines.5.pl


Print lines read from stdin in reverse order.
Using unshift
    while ($line = <STDIN>) {
    unshift @lines, $line;
}
print @lines;

Download reverse_lines.6.pl


Simple cp implementation using line by line I/O
    die "Usage: $0 <infile> <outfile>\n" if @ARGV != 2;

$infile = shift @ARGV;
$outfile = shift @ARGV;

open my $in, '<', $infile or die "Cannot open $infile: $!";
open my $out, '>', $outfile or die "Cannot open $outfile: $!";

while ($line = <$in>) {
    print $out $line;
}

close $in;
close $out;
exit 0;

Download cp.0.pl


Simple cp implementation using line by line I/O relying on the default variable $_
    die "Usage: $0 <infile> <outfile>\n" if @ARGV != 2;

$infile = shift @ARGV;
$outfile = shift @ARGV;

open my $in, '<', $infile or die "Cannot open $infile: $!";
open my $out, '>', $outfile or die "Cannot open $outfile: $!";

# loop could also be written in one line:
# print OUT while <IN>;

while (<$in>) {
    print $out;
}

close $in;
close $out;
exit 0;

Download cp.1.pl


Simple cp implementation reading entire file into array note that <> returns an array of lines in a list context (in a scalar context it returns a single line)
    die "Usage: $0 <infile> <outfile>\n" if @ARGV != 2;

$infile = shift @ARGV;
$outfile = shift @ARGV;

open my $in, '<', $infile or die "Cannot open $infile: $!";
@lines = <$in>;
close $in;

open my $out, '>', $outfile or die "Cannot open $outfile: $!";
print $out @lines;
close $out;

exit 0;

Download cp.2.pl


Simple cp implementation via system!
Will break if filenames contain single quotes
    die "Usage: $0 <infile> <outfile>\n" if @ARGV != 2;

$infile = shift @ARGV;
$outfile = shift @ARGV;

exit system "/bin/cp '$infile' '$outfile'";

Download cp.3.pl


Simple cp implementation reading entire file into array $/ contains the line separator for Perl if it is undefined we can slurp an entire file into a scalar variable with a single read
    die "Usage: cp <infile> <outfile>\n" if @ARGV != 2;
$infile = shift @ARGV;
$outfile = shift @ARGV;

undef $/;
open my $in, '<', $infile or die "Cannot open $infile: $!";
$contents = <$in>;
close $in;

open my $out, '>', $outfile or die "Cannot open $outfile: $!";
print $out $contents;
close $out;

exit 0;

Download cp.4.pl



Reads lines of input until end-of-input
Print snap! if a line has been seen previously
    while (1) {
    print "Enter line: ";
    $line = <STDIN>;
    if (!defined $line) {
        last;
    }
    if ($seen{$line}) {
        print "Snap!\n";
    }
    $seen{$line}++;
}

Download snap_memory.0.pl


More concise version of snap_memory.0.pl
    while (1) {
    print "Enter line: ";
    $line = <STDIN>;
    last if !defined $line;
    print "Snap!\n" if $seen{$line};
    $seen{$line} = 1;
}

Download snap_memory.1.pl


run as ./expel_student mark_deductions.txt find the student with the largest mark deductions expell them
    while ($line = <>) {
    chomp $line;
    $line =~ s/^"//;
    $line =~ s/"$//;
    my ($name,$offence,$date,$penalty);
    ($name,$offence,$date,$penalty) = split /"\s*,\s*"/, $line;
    $penalty =~ s/[^0-9]//g;
    $deduction{$name} += $penalty;
}

$worst = 0;
foreach $student (keys %deduction) {
    $penalty = $deduction{$student};
    if ($penalty > $worst) {
        $worst_student = $student;
        $worst = $penalty;
    }
}
print "Expel $worst_student who had $worst marks deducted\n";

Download expel_student.pl


Print the nth word on every line of input files/stdin output is piped through fmt to make reading easy
    die "Usage: $0 <n> <files>\n" if !@ARGV;
$nth_word = shift @ARGV;
open my $f, '|-', "fmt -w 40" or die "Can not run fmt: $!\n";
while ($line = <>) {
    chomp $line;
    @words = split(/ /, $line);
    print $f "$words[$nth_word]\n" if $words[$nth_word];
}
close $f;

Download nth_word.pl




Perl provides only 1 dimensional arrays but arrays elements can contain references to other arrays
    foreach $i (0..3) {
   foreach $j (0..3) {
        $a[$i][$j] = $i * $j;
    }
}

# We can index @a as if it is a 2d-array
# The following loop prints
#  0  0  0  0
#  0  1  2  3
#  0  2  4  6
#  0  3  6  9

foreach $i (0..3) {
    foreach $j (0..3) {
        printf "%2d ", $a[$i][$j];
    }
    print "\n";
}

# @a contains references to 4 arrays
# the following loop will print something like
# ARRAY(0x55ab77d5e120)
# ARRAY(0x55ab77d5e2a0)
# ARRAY(0x55ab77d687c8)
# ARRAY(0x55ab77d68858)

foreach $i (0..3) {
    print "$a[$i]\n";
}

# We can access the whole array referenced by $a[2] as @{$a[2]}
# the following statement prints
# 0 2 4 6

print "@{$a[2]}\n";

Download 2d_array.pl


    @a = ();

# assign reference to array to $a[42]
$a[42] = [1,2,3];

print "$a[42]\n";      # print ARRAY(0x5576c45e8160)
print "@{$a[42]}\n";   # prints 1 2 3

push @{$a[42]}, (4,5,6);
push @{$a[42]}, (7,8,9);

print "$a[42]\n";      # print ARRAY(0x5576c45e8160)
print "@{$a[42]}\n";   # prints 1 2 3 4 5 6 7 8 9/tmp/a.pl

Download using_2d_array.pl

Perl Regex

External resources

regex summary
count how many people enrolled in each course
    open my $f, '<', "course_codes" or die "$0: can not open course_codes: $!";
while ($line = <$f>) {
    chomp $line;
    $line =~ /([^ ]+) (.+)/ or die "$0: bad line format '$line'";
    $course_names{$1} = $2;
}
close $f;

while ($course = <>) {
    chomp $course;
    $course =~ s/\|.*//;
    $count{$course}++;
}

foreach $course (sort keys %count) {
    print "$course_names{$course} has $count{$course} students enrolled\n";
}

Download count_enrollments.pl

run as count_first_names.pl enrollments count how many people enrolled have each first name
    while ($line = <>) {
    @fields = split /\|/, $line;
    $student_number = $fields[1];
    next if $already_counted{$student_number};
    $already_counted{$student_number} = 1;
    $full_name = $fields[2];
    $full_name =~ /.*,\s+(\S+)/ or next;
    $first_name = $1;
    $fn{$first_name}++;
}

foreach $first_name (sort keys %fn) {
    printf "There are %2d people with the first name $first_name\n", $fn{$first_name};
}



Download count_first_names.pl

run as duplicate_first_names.pl enrollments
Report cases where there are multiple people of the same same first name enrolled in a course
    while ($line = <>) {
    @fields = split /\|/, $line;
    $course = $fields[0];
    $full_name = $fields[2];
    $full_name =~ /.*,\s+(\S+)/ or next;
    $first_name = $1;
    $cfn{$course}{$first_name}++;
}

foreach $course (sort keys %cfn) {
    foreach $first_name (sort keys %{$cfn{$course}}) {
        next if $cfn{$course}{$first_name} < 2;
        printf "In $course there are %d people with the first name $first_name\n", $cfn{$course}{$first_name};
    }
}



Download duplicate_first_names.pl


For each file given as argument replace occurrences of Hermione allowing for some misspellings with Harry and vice-versa.
Relies on Zaphod not occurring in the text.
Modified text is stored in a new file which is then renamed to replace the old file
    foreach $filename (@ARGV) {
    $tmp_filename = "$filename.new";
    die "$0: $tmp_filename already exists" if -e "$tmp_filename";
    open my $f, '<', $filename or die "$0: Can not open $filename: $!";
    open my $g, '>', $tmp_filename or die "$0: Can not open $tmp_filename : $!";
    while ($line = <$f>) {
        $line =~ s/Herm[io]+ne/Zaphod/g;
        $line =~ s/Harry/Hermione/g;
        $line =~ s/Zaphod/Harry/g;
        print $g $line;
    }
    close $f;
    close $g;
    rename "$tmp_filename", $filename or die "$0: Can not rename file";
}

Download gender_reversal.0.pl


For each file given as argument replace occurrences of Hermione allowing for some misspellings with Harry and vice-versa.
Relies on Zaphod not occurring in the text.
Modified text is stored in an array then the file is over-written
    foreach $filename (@ARGV) {
    open my $f, '<', $filename or die "$0: Can not open $filename: $!";
    $line_count = 0;
    while ($line = <$f>) {
        $line =~ s/Herm[io]+ne/Zaphod/g;
        $line =~ s/Harry/Hermione/g;
        $line =~ s/Zaphod/Harry/g;
        $new_lines[$line_count++] = $line;
    }
    close $f;
    open my $g, '>', ">$filename" or die "$0: Can not open $filename : $!";
    print $g @new_lines;
    close $g;
}

Download gender_reversal.1.pl


For each file given as argument replace occurrences of Hermione allowing for some misspellings with Harry and vice-versa.
Relies on Zaphod not occurring in the text.
Modified text is stored in an array then the file is over-written
    foreach $filename (@ARGV) {
    open my $f, '<', $filename or die "$0: Can not open $filename: $!";
    @lines = <$f>;
    close $f;

    # note loop variable $line is aliased to array elements
    # changes to it change the corresponding array element
    foreach $line (@lines) {
        $line =~ s/Herm[io]+ne/Zaphod/g;
        $line =~ s/Harry/Hermione/g;
        $line =~ s/Zaphod/Harry/g;
    }

    open my $g, '>', ">$filename" or die "$0: Can not open $filename : $!";
    print $g @lines;
    close $g;
}

Download gender_reversal.2.pl


For each file given as argument replace occurrences of Hermione allowing for some misspellings with Harry and vice-versa.
Relies on Zaphod not occurring in the text. text is read into a string, the string is changed, then the file is over-written

See http://www.perlmonks.org/?node_id=1952 for alternative way to read a file into a string
    foreach $filename (@ARGV) {
    open my $f, '<', $filename or die "$0: Can not open $filename: $!";
    while ($line = <$f>) {
        $novel .= $line;
    }
    close $f;

    $novel =~ s/Herm[io]+ne/Zaphod/g;
    $novel =~ s/Harry/Hermione/g;
    $novel =~ s/Zaphod/Harry/g;

    open my $g, '>', ">$filename" or die "$0: Can not open $filename : $!";
    print $g $novel;
    close $g;
}

Download gender_reversal.3.pl


For each file given as argument replace occurrences of Hermione allowing for some misspellings with Harry and vice-versa.
Relies on Zaphod not occurring in the text.
The unix filter-like behaviour of <> is used to read files
Perl's -i option is used to replace file with output from script
    while ($line = <>) {
    chomp $line;
    $line =~ s/Herm[io]+ne/Zaphod/g;
    $line =~ s/Harry/Hermione/g;
    $line =~ s/Zaphod/Harry/g;
    print $line;
}

Download gender_reversal.4.pl


For each file given as argument replace occurrences of Hermione allowing for some misspellings with Harry and vice-versa.
Relies on Zaphod not occurring in the text.
The unix filter-like behaviour of <> is used to read files
Perl's -i option is used to replace file with output from the script.
Perl's default variable $_ is used
    while (<>) {
    s/Herm[io]+ne/Zaphod/g;
    s/Harry/Hermione/g;
    s/Zaphod/Harry/g;
}

Download gender_reversal.5.pl


For each file given as argument replace occurrences of Hermione allowing for some misspellings with Harry and vice-versa.
Relies on Zaphod not occurring in the text.
Perl's -p option is used to produce unix filter-like behaviour.
Perl's -i option is used to replace file with output from the script.
    s/Herm[io]+ne/Zaphod/g;
s/Harry/Hermione/g;
s/Zaphod/Harry/g;

Download gender_reversal.6.pl


Fetch a web page removing HTML tags and constants (e.g &amp;)
Lines between script or style tags are skipped.
Non-blank lines are printed

There are better ways to fetch web pages (e.g. HTTP::Request::Common)
The regex code below doesn't handle a number of cases. It is often better to use a library to properly parse HTML before processing it.
But beware illegal HTML is common & often causes problems for parsers.
    foreach $url (@ARGV) {
    open my $f, '-|', "wget -q -O- '$url'" or die;
    while ($line = <$f>) {
        if ($line =~ /^\s*<(script|style)/i) {
            while ($line = <$f>) {
                last if $line =~ /^\s*<\/(script|style)/i;
            }
        } else {
            $line =~ s/&\w+;/ /g;
            $line =~ s/<[^>]*>//g;
            print $line if $line =~ /\S/;
        }
    }
    close $f;
}

Download wget.0.pl


Fetch a web page removing HTML tags and constants
The contents of script or style tags are removed..
Non-blank lines are printed

The regex code below doesn't handle a number of cases. It is often better to use a library to properly parse HTML before processing it.
But beware illegal HTML is common & often causes problems for parsers.
note the use of the s modifier to allow . to match a newline


    use LWP::Simple;
foreach $url (@ARGV) {
    $html = get $url;
    $html =~ s/<script.*?<\/script>//isg;  # remove script tags including contents
    $html =~ s/<style.*?<\/style>//isg;    # remove style tags including contents
    $html =~ s/<.*?>//isg; # remove tags
    $html =~ s/\n\s*\n/\n/ig;  # blank lines
    print $html;
}

Download wget.1.pl



Find the positive integers among input text print their sum and mean

Note regexp to split on non-digits
Note check to handle empty string from split
    @input_text_array = <>;
$input_text_array = join "", @input_text_array;

@numbers = split(/\D+/, $input_text_array);
print join(",", @numbers), "\n";

foreach $number (@numbers) {
    if ($number ne '') {
        $total += $number;
        $n++;
    }
}

if (@numbers) {
    printf "$n numbers: total $total mean %s\n", $total/$n;
}

Download find_numbers.0.pl



Find integers (positive and negative) among input text print their sum and mean

Note regexp to match number: -?\d+
Harder to use split here (unlike just positive integers)
    @input_text_array = <>;
$input_text_array = join "", @input_text_array;

@numbers = $input_text_array =~ /-?\d+/g;

foreach $number (@numbers) {
    $total += $number;
}

if (@numbers) {
    $n = @numbers;
    printf "$n numbers: total $total mean %s\n", $total/$n;
}

Download find_numbers.1.pl


Print the last number (real or integer) on every line if there is one.
Note regexp to match number: -?\d+(\.\d+)?
    while ($line = <>) {
    if ($line =~ /(-?\d+(\.\d+)?)\D*$/) {
        print "$1\n";
    }
}

Download print_last_number.pl


run as course_first_names.pl enrollments report cases where there are multiple people same first name enrolled in acourse
    while ($line = <>) {
    @fields = split /\|/, $line;
    $course = $fields[0];
    $full_name = $fields[2];
    $full_name =~ /.*,\s+(\S+)/ or next;
    $first_name = $1;
    $cfn{$course}{$first_name}++;
}

foreach $course (sort keys %cfn) {
    foreach $first_name (sort keys %{$cfn{$course}}) {
        next if $cfn{$course}{$first_name} < 2;
        printf "In $course there are %d people with the first name $first_name\n", $cfn{$course}{$first_name};
    }
}



Download course_first_names.pl

for each courses specified as arguments print a summary of the other courses taken by students in this course
    $enrollment_file = shift @ARGV or die;
$debug = 0;

open my $c, '<', "course_codes" or die "$0: can not open course_codes: $!";
while (<$c>) {
    ($code, $name) = /\s*(\S+)\s+(.*)/ or die "$0: invalid course codes line: $_";
    $course_name{$code} = $name;
    print STDERR "code='$code' -> name='$name'\n" if $debug;
}
close $c;

open my $f, "<$enrollment_file" or die "$0: can not open $enrollment_file: $!";;
while (<$f>) {
    ($course,$upi,$name) = split /\s*\|\s*/;
    push @{$course{$upi}}, $course;
    $name =~ s/(.*), (.*)/$2 $1/;
    $name =~ s/ .* / /;
    $name{$upi} = $name;
}
close $f;

foreach $course (@ARGV) {
    %n_taking = ();
    $n_students = 0;
    foreach $upi (keys %course) {
        @courses = @{$course{$upi}};
        next if !grep(/$course/, @courses);
        foreach $c (@courses) {
            $n_taking{$c}++;
        }
        $n_students++;
    }
    foreach $c (sort {$n_taking{$a} <=> $n_taking{$b}} keys %n_taking) {
        printf "%5.1f%% of %s students take %s %s\n",
            100*$n_taking{$c}/$n_students, $course, $c, $course_name{$c};
    }
}

Download course_statistics.pl

Perl Functions


This shows a bug due to a missing my declaration

In this case the use of $i in is_prime without a my declarations changes $i outside the function and breaks the while loop calling the function
    sub is_prime {
    my ($n) = @_;
    $i = 2;
    while ($i < $n) {
        return 0 if $n % $i == 0;
        $i++;
    }
    return 1;
}

$i = 0;
while ($i < 1000) {
    print "$i\n" if is_prime($i);
    $i++;
}

Download my_declaration_bug.pl

3 different ways to sum a list - illustrating various aspects of Perl
simple for loop
    sub sum_list0 {
    my (@list) = @_;
    my $total = 0;
    foreach $element (@list) {
       $total += $element;
    }
    return $total;
}

# recursive
sub sum_list1 {
    my (@list) = @_;
    return 0 if !@list;
    return $list[0] + sum_list1(@list[1..$#list]);
}

# join+eval - interesting but not recommended
sub sum_list2 {
    my (@list) = @_;
    return eval(join("+", @list))
}

print sum_list0(1..10), " ", sum_list1(1..10), " ", sum_list2(1..10),  "\n";

Download sum_list.pl


simple example illustrating use of sorting comparison funcrion note use of <=?>
    sub random_date {
    return sprintf "%02d/%02d/%04d", 1 + rand 28, 1 + rand 12, 2000+rand 20
}

sub compare_date {
    my ($day1,$month1,$year1) = split /\D+/, $a;
    my ($day2,$month2,$year2) = split /\D+/, $b;
    return $year1 <=> $year2 || $month1 <=> $month2 || $day1 <=> $day2;
}

push @random_dates, random_date() foreach 1..5;
print "random_dates=@random_dates\n";
@sorted_dates = sort compare_date @random_dates;
print "sorted dates=@sorted_dates\n";

Download sort_dates.pl



Simple example of sorting a list based on the values in a hash.
This is very common pattern in Perl.
    %days = (Sunday => 0, Monday => 1, Tuesday => 2, Wednesday => 3,
         Thursday => 4, Friday => 5, Saturday => 6);

sub random_day {
    my @days = keys %days;
    return $days[rand @days];
}

sub compare_day {
    return $days{$a} <=> $days{$b};
}

push @random_days, random_day() foreach 1..5;
print "random days = @random_days\n";
@sorted_days = sort compare_day @random_days;
print "sorted days = @sorted_days\n";

Download sort_days.pl



Simple example of sorting a list based on the values in a hash.
This is very common pattern in Perl. modified version ilustration Perl quote word operator and a hash slice

Perl's quote appropriate is a convenient way to create a list of words
    @days = qw/Sunday Monday Tuesday Wednesday Thursday Friday Saturday/;

# Perl allows you to assign to multiple values in a hash simultaneously
@days{@days} = (0..6);

sub random_day {
    my @days = keys %days;
    return $days[rand @days];
}

sub compare_day {
    return $days{$a} <=> $days{$b};
}

push @random_days, random_day() foreach 1..5;
print "random days = @random_days\n";
@sorted_days = sort compare_day @random_days;
print "sorted days = @sorted_days\n";

Download sort_days.1.pl


implementations of Perl's split & join
    sub my_join {
    my ($separator, @list) = @_;

    return "" if !@list;

    my $string = shift @list;
    foreach $thing (@list) {
        $string .= $separator . $thing;
    }

    return $string;
}

sub my_split1 {
    my ($regexp, $string) = @_;
    my @list = ();

    while ($string =~ /(.*)$regexp(.*)/) {
        unshift @list, $2;
        $string = $1;
    }
    unshift @list, $string if $string ne "";

    return @list;
}

sub my_split2 {
    my ($regexp, $string) = @_;

    my @list = ();
    while ($string =~ s/(.*?)$regexp//) {
        push @list, $1;
    }
    push @list, $string if $string ne "";

    return @list;
}

$s = my_join("+", 1..5);

# prints 1+2+3+4+5 = 15
print "$s = ", eval $s, "\n";

# prints 2 4 8 16
@a = my_split1(",", "2,4,8,16");
print "@a\n";

# prints 2 4 8 16
@a = my_split2(",", "2,4,8,16");
print "@a\n";

Download split_join.pl

implementations of Perl's push
    sub my_push1 {
    my ($array_ref,@elements) = @_;

    @$array_ref = (@$array_ref, @elements);

    return $#$array_ref + 1;
}


# same but with prototype
sub my_push2(\@@) {
    my ($array_ref,@elements) = @_;

    @$array_ref = (@$array_ref, @elements);

    return $#$array_ref + 1;
}

sub mypush2 {
    my ($array_ref,@elements) = @_;
    if (@elements) {
        @$array_ref = (@$array_ref, @elements);
    } else {
        @$array_ref = (@$array_ref, $_);
    }
}

@a = (1..5);

# note explicitly passing an array reference \@a
my_push1 \@a, 10..15;

# note prototype allows caused reference to array to be passed
my_push2 @a, 20..25;

# prints 1 2 3 4 5 10 11 12 13 14 15 20 21 22 23 24 25
print "@a\n";

Download push.pl


8 different ways to print the odd numbers in a list - illustrating various aspects of Perl
simple for loop
    sub print_odd0 {
    my (@list) = @_;

    foreach $element (@list) {
        print "$element\n" if $element % 2;
    }
}

# simple for loop using index
sub print_odd1 {
    my (@list) = @_;

    foreach $i (0..$#list) {
        print "$list[$i]\n" if $list[$i] % 2;
    }
}

# set $_ in turn to each item in list
# evaluate supplied expression
# print item if the expression evaluates to true

sub print_list0 {
    my ($select_expression, @list) = @_;
    foreach $_ (@list) {
        print "$_\n" if &$select_expression;
    }
}

# more concise version of print_list0
sub print_list1 {
   &{$_[0]} && print "$_\n" foreach @_[1..$#_];
}


# set $_ in turn to each item in list
# evaluate supplied expression
# return a list of items for which the expression evaluated to true
sub my_grep0 {
    my $select_expression = $_[0];
    my @matching_elements;
    foreach $_ (@_[1..$#_]) {
        push @matching_elements, $_ if &$select_expression;
    }
    return @matching_elements;
}


# more concise version of my_grep0
sub my_grep1 {
    my $select_expression = shift;
    my @matching_elements;
    &$select_expression && push @matching_elements, $_ foreach @_;
    return @matching_elements;
}

# calling helper function which returns
# list items selected by an expression
sub print_odd4 {
    my @odd = my_grep0 sub {$_ % 2}, @_;
    foreach $x (@odd) {
        print "$x\n";
    }
}


@numbers = (1..10);

# all 8 statements print the numbers 1,3,5,7,9 one per line

print_odd0(@numbers);

print_odd1(@numbers);

print_list0(sub {$_ % 2}, @numbers);

print_list1(sub {$_ % 2}, @numbers);

print_odd4(@numbers);

my_grep1 sub {odd $_ && print "$_\n"}, @_;

# using built-in grep and combining print
grep {$_ % 2 && print "$_\n"} @numbers;

# using built-in grep and join
print join("\n", grep {$_ % 2} @numbers), "\n";

Download print_odd.pl

rename specified files using specified Perl code
For each file the Perl code is executed with $_ set to the filename and the file is renamed to the value of $_ after the execution. /usr/bin/rename provides this functionality
    die "Usage: $0 <perl> [files]\n" if !@ARGV;
$perl_code = shift @ARGV;
foreach $filename (@ARGV) {

    $_ = $filename;
    eval $perl_code;
    die "$0: $?" if $?; # eval leaves any error message in $?
    $new_filename = $_;

    next if $filename eq $new_filename;

    die "$0: $new_filename exists already\n" if -e $new_filename;

    rename $filename, $new_filename or
        die "$0: rename '$filename' -> '$new_filename' failed: $!\n";
}

Download rename.pl

print a HTML times table

Note html_times_table has 6 parameters calls to the function are hard to read and its easy to introduce errors
    sub html_times_table {
    my ($min_x, $max_x, $min_y, $max_y, $bgcolor, $border) = @_;

    my $html = "<table border=$border bgcolor=$bgcolor>\n";

    foreach $y ($min_y..$max_y) {
        $html .= "<tr>";
        foreach $x ($min_x..$max_x) {
            $html .= sprintf "<td align=right>%s</td>", $x * $y;
        }
        $html .=  "</tr>\n";
    }

    $html .=  "</table>\n";

    return $html;
}

# what do each of these parameters do?
print html_times_table(1, 12, 1, 12, "pink", 1);

Download html_times_table0.pl

print a HTML times table

Note use of a hash to pass named parameters
    sub html_times_table {
    my %parameters = @_;

    my $html = "<table border=$parameters{border} bgcolor=$parameters{bgcolor}>\n";

    foreach $y ($parameters{min_y}..$parameters{max_y}) {
        $html .= "<tr>";
        foreach $x ($parameters{min_x}..$parameters{max_y}) {
            $html .= sprintf "<td align=right>%s</td>", $x * $y;
        }
        $html .=  "</tr>\n";
    }

    $html .=  "</table>\n";

    return $html;
}

# easy to understand what each paramater does
print html_times_table(bgcolor=>'pink', min_y=>1, max_y=>12, border=>1, min_x=>1, max_x=>12);

Download html_times_table1.pl

print a HTML times table

Note use of a hash to pass named parameters combined with a hash to provide default values for parameters
    sub html_times_table {
    my %arguments = @_;

    my %defaults = (min_x=>1, max_x=>10, min_y=>1, max_y=>10, bgcolor=>'white', border=>0);

    my %parameters = (%defaults,%arguments);

    my $html = "<table border=$parameters{border} bgcolor=$parameters{bgcolor}>\n";

    foreach $y ($parameters{min_y}..$parameters{max_y}) {
        $html .= "<tr>";
        foreach $x ($parameters{min_x}..$parameters{max_y}) {
            $html .= sprintf "<td align=right>%s</td>", $x * $y;
        }
        $html .=  "</tr>\n";
    }

    $html .=  "</table>\n";

    return $html;
}

# even more readable because we don't have to supply default values for parameters

print html_times_table(max_y=>12, max_x=>12, bgcolor=>'pink');

Download html_times_table2.pl


    @list = randomize_list(1..20);
print "@list\n";
@sorted_list0 = sort {$a <=> $b} @list;
print "@sorted_list0\n";
@sorted_list1 = quicksort0(@list);
print "@sorted_list1\n";
@sorted_list2 = quicksort1(sub {$a <=> $b}, @list);
print "@sorted_list2\n";

sub quicksort0 {
    return @_ if @_ < 2;
    my ($pivot,@numbers) = @_;
    my @less = grep {$_ < $pivot} @numbers;
    my @more = grep {$_ >= $pivot} @numbers;
    my @sorted_less = quicksort0(@less);
    my @sorted_more = quicksort0(@more);
    return (@sorted_less, $pivot, @sorted_more);
}


sub quicksort1 {
    my ($compare) = shift @_;
    return @_ if @_ < 2;
    my ($pivot, @input) = @_;
    my (@less, @more);
    partition1($compare, $pivot, \@input, \@less, \@more);
    my @sorted_less = quicksort1($compare, @less);
    my @sorted_more = quicksort1($compare, @more);
    my @r = (@sorted_less, $pivot, @sorted_more);
    return (@sorted_less, $pivot, @sorted_more);
}

sub partition1 {
    my ($compare, $pivot, $input, $smaller, $larger) = @_;
    foreach $x (@$input) {
        our $a = $x;
        our $b = $pivot;
        if (&$compare  < 0) {
            push @$smaller, $x;
        } else {
            push @$larger, $x;
        }
    }
}

sub randomize_list {
    my @newlist;
    while (@_) {
        my $random_index = rand @_;
        my $r = splice @_,  $random_index, 1;
        push @newlist, $r;
    }
    return @newlist;
}

Download quicksort0.pl


    sub quicksort0(@);
sub quicksort1(&@);
sub partition1(&$\@\@\@);
sub randomize_list(@);

@list = randomize_list 1..20;
print "@list\n";
@sorted_list0 = sort {$a <=> $b} @list;
print "@sorted_list0\n";
@sorted_list1 = quicksort0 @list;
print "@sorted_list1\n";
@sorted_list2 = quicksort1 {$a <=> $b} @list;
print "@sorted_list2\n";

sub quicksort0(@) {
    return @_ if @_ < 2;
    my ($pivot,@numbers) = @_;
    my @less = grep {$_ < $pivot} @numbers;
    my @more = grep {$_ >= $pivot} @numbers;
    my @sorted_less = quicksort0 @less;
    my @sorted_more = quicksort0 @more;
    return (@sorted_less, $pivot, @sorted_more);
}


sub quicksort1(&@) {
    my ($compare) = shift @_;
    return @_ if @_ < 2;
    my ($pivot, @input) = @_;
    my (@less, @more);
    partition1 \&$compare, $pivot, @input, @less, @more;
    my @sorted_less = quicksort1 \&$compare, @less;
    my @sorted_more = quicksort1 \&$compare, @more;
    my @r = (@sorted_less, $pivot, @sorted_more);
    return (@sorted_less, $pivot, @sorted_more);
}

sub partition1(&$\@\@\@) {
    my ($compare, $pivot, $input, $smaller, $larger) = @_;
    foreach $x (@$input) {
        our $a = $x;
        our $b = $pivot;
        if (&$compare  < 0) {
            push @$smaller, $x;
        } else {
            push @$larger, $x;
        }
    }
}

sub randomize_list(@) {
    my @newlist;
    while (@_) {
        my $random_index = rand @_;
        my $r = splice @_,  $random_index, 1;
        push @newlist, $r;
    }
    return @newlist;
}

Download quicksort1.pl

Perl Modules
    package Example_Module;
# written by andrewt@cse.unsw.edu.au for COMP2041 
# 
# Definition of a simple Perl module.
#
# List::Util provides the functions below and more

use base 'Exporter';
our @EXPORT = qw/sum min max minstr maxstr/;
use List::Util qw/reduce/;


sub sum {
	return reduce {$a + $b} @_;
}

sub min {
	return reduce {$a < $b ? $a : $b} @_;
}

sub max {
	return reduce {$a > $b ? $a : $b} @_;
}

sub minstr {
	return reduce {$a lt $b ? $a : $b} @_;
}

sub maxstr {
	return reduce {$a gt $b ? $a : $b} @_;
}

# necessary
1;

Download Example_Module.pm



Use of a simple Perl module.

The directory containing Example_Module.pm should be in environment variable PERL5LIB
PERL5LIB is colon separated list of directory to search similar to PATH

    use Example_Module qw/max/;

# As max is specified in our import list it can be used without the module name
print max(42,3,5), "\n";

# We don't import min explicitly so it needs the module name
print Example_Module::min(42,3,5), "\n";

Download module_example.pl

Webserver

simple Perl TCP/IP client
access by nc -d localhost 4242 or via timeclient.pl

    use IO::Socket::INET;

$server = IO::Socket::INET->new(LocalPort => 4242, Listen=> 16) or die;

while ($c = $server->accept()) {
    printf STDERR "[Connection from %s]\n", $c->peerhost;
    print $c scalar localtime, "\n";
    close $c;
}

Download timeserver.pl


simple Perl TCP/IP client
    use IO::Socket::INET;

$server_host =  $ARGV[0] || 'localhost';
$server_port = 4242;

$c = IO::Socket::INET->new(PeerAddr => $server_host, PeerPort => $server_port) or die;
$time = <$c>;
close $c;

print "Time is $time";

Download timeclient.pl


fetch files via http from the webserver at the specified URL see HTTP::Request::Common for a more general solution
    use IO::Socket::INET;

foreach $url (@ARGV) {

    # parse URL
    my ($protocol, $host, $port, $path) = $url =~ /(https?):\/\/([^\/:]+)(?::(\d+))?(.*)/ or die;

    # connect to web server
    if ($protocol eq "http") {
        $c = IO::Socket::INET->new(PeerAddr => $host, PeerPort  => $port || 80) or die;
    } else {
        $c = IO::Socket::SSL->new(PeerAddr => $host, PeerPort  => $port || 443) or die;
    }

    $path = "/" if $path eq "";

    # send request for web page to server
    print $c "GET $path HTTP/1.0\r\n\r\n";

    # read & print what the server returns
    my @webpage = <$c>;
    close $c;
    print "GET $url =>\n", @webpage, "\n";
}

Download webget.pl



Simple Perl web server which returns 404 to any request
access by curl -I http://localhost:2041/ or via webget.pl http://localhost:2041/ or via any web browser
    use IO::Socket::INET;

$server = IO::Socket::INET->new(LocalPort => 2041, ReuseAddr => 1, Listen => 16) or die;

print "Access this server at http://localhost:2041/\n\n";

while ($c = $server->accept()) {
    printf "HTTP request from %s\n\n", $c->peerhost;

    # read and print request header
    while ($header_line = <$c>) {
        print "$header_line";
        last if $header_line !~ /\S/;
    }

    # send our sad reply
    print $c "HTTP/1.0 404 This webserver always returns a 404 status code\n\n";

    close $c;
}

Download webserver-404.pl



Simple Perl web server which returns the same response to any request
access by curl -I http://localhost:2041/ or via webget.pl http://localhost:2041/ or via any web browser
    use IO::Socket::INET;

$server = IO::Socket::INET->new(LocalPort => 2041, ReuseAddr => 1, Listen => 16) or die;

print "Access this web server at http://localhost:2041/\n\n";

$content = "Everything is OK - you will pass COMP(2041|9044).\n";

while ($c = $server->accept()) {
    printf "HTTP request from %s\n\n", $c->peerhost;

    # read and print request header
    while ($header_line = <$c>) {
        print "$header_line";
        last if $header_line !~ /\S/;
    }

    # print header
    print $c "HTTP/1.0 200 OK\n";
    print $c "Content-Type: text/plain\n";
    printf $c "Content-Length: %d\n", length($content);
    print $c "\n";

    # print body
    print $c $content;

    close $c;
}

Download webserver-200.pl



Simple Perl **INSECURE** web server
    use IO::Socket::INET;

# access by curl -I http://localhost:2041/
# or via webget.pl  http://localhost:2041/
# or via any web browser

# note very insecure
# vulnerable to directory traversal (..), e.g:
# curl --path-as-is http://localhost:2041/../../etc/passwd
# and shell injection
#  curl --path-as-is http://localhost:2041/\;date\|

$server = IO::Socket::INET->new(LocalPort => 2041, ReuseAddr => 1, Listen => 16) or die;

print "Access this server at http://localhost:2041/\n\n";

while ($c = $server->accept()) {
    my $request = <$c>;
    print "Connection from ", $c->peerhost, ": $request";

    my ($path) = $request =~ /^GET (.+) HTTP\/1.[01]\s*$/;

    my $filename =  "/var/www/$path";

    print "Sending back $filename\n";

    if (open my $f, $filename) {
        $content = join "", <$f>;
        $return_code = 200;
        close $f;
    } else {
        $content = "Please come again\n";
        $return_code = 404;
    }

    print $c "HTTP/1.0 $return_code OK\n";
    print $c "Content-Type: text/html\n";
    printf $c "Content-Length: %d\n", length($content);
    print $c "\n";

    print $c $content;

    close $c;
}

Download webserver-simple-insecure.pl



Simple Perl web server
access by curl -I http://localhost:2041/ or via webget.pl http://localhost:2041/ or via any web browser
    use IO::Socket::INET;

$server = IO::Socket::INET->new(LocalPort => 2041, ReuseAddr => 1, Listen => 16) or die;

print "Access this server at http://localhost:2041/\n\n";

while ($c = $server->accept()) {
    my $request = <$c>;
    print "Connection from ", $c->peerhost, ": $request";

    # default return values
    my $status_line = "400 BAD REQUEST";
    my $content = "";

    my ($path) = $request =~ /^GET (.+) HTTP\/1.[01]\s*$/;

    if ($path) {
        # remove .. from PATH
        # better would be to generate real path of file and check in /var/www

        $path =~ s?(^|/)\.\.(/|$)?/?g;

        my $filename =  "/var/www/$path";

        # send back contents of index.html if path is directory and it exists
        $filename .= "/index.html" if -e "$filename/index.html";

        if (open my $f, '<', $filename) {
            print "Sending back $filename\n";
            $content = join "", <$f>;
            $status_line = "200 OK";
            close $f;
        } else {
            $status_line = "404 FILE NOT FOUND";
        }
    }

    print $c "HTTP/1.0 $status_line\n";
    print $c "Content-Type: text/html\n";
    printf $c "Content-Length: %d\n", length($content);
    print $c "\n";

    print $c $content;

    close $c;
}

Download webserver-simple.pl



Simple Perl web server which use /etc/mime.types
access by curl -I http://localhost:2041/ or via webget.pl http://localhost:2041/ or via any web browser
    use IO::Socket::INET;

$server = IO::Socket::INET->new(LocalPort => 2041, ReuseAddr => 1, Listen => 16) or die;

open my $mt, '<', "/etc/mime.types" or die "Can not open /etc/mime.types: $!\n";
while ($line = <$mt>) {

    $line =~ s/#.*//; # remove comments

    my ($mime_type, @extensions) = split /\s+/, $line;
    foreach $extension (@extensions) {
        $mime_type{$extension} = $mime_type;
    }
}
close $mt;

print "Access this server at http://localhost:2041/\n\n";

while ($c = $server->accept()) {
    my $request = <$c>;
    print "Connection from ", $c->peerhost, ": $request";

    # default return values
    my $status_line = "400 BAD REQUEST";
    my $content_type = "text/plain";
    my $content = "";

    my ($path) = $request =~ /^GET (.+) HTTP\/1.[01]\s*$/;

    if ($path) {

        # remove .. from PATH
        # better would be to generate real path of file and check in /var/www
        $path =~ s?(^|/)\.\.(/|$)?/?g;

        my $filename =  "/var/www/$path";

        # send back contents of index.html if path is directory and it exists
        $filename .= "/index.html" if -e "$filename/index.html";

        # get the extension e.g. jpg for a file
        my ($extension) = $filename =~ /\.(\w+)$/;

        if (open my $f, '<', $filename) {
            print "Sending back $filename\n";

            $content = join "", <$f>;
            close $f;

            $content_type = $mime_type{$extension} if $extension && $mime_type{$extension};

            $status_line = "200 OK";
        } else {
            $status_line = "404 FILE NOT FOUND";
        }
    }

    print $c "HTTP/1.0 $status_line\n";
    print $c "Content-Type: $content_type\n";
    printf $c "Content-Length: %d\n", length($content);
    print $c "\n";

    print $c $content;

    close $c;
}

Download webserver-mime-types.pl


simple demo of persistant storage in Perl
    use Storable;

$cache_file = "./.cache";

# load hash from $cache_file if it exists

if (-r $cache_file) {
    %h = %{retrieve($cache_file)};
}

$h{'COUNT'}++;

print "This script has now been run $h{COUNT} times\n";

# store updated hash in $cache_file
store \%h, $cache_file;

Download persistent.pl

fetch files via http from the webserver at the specified URL with a simple cookie implementation (no expiry) see HTTP::Request::Common for a more general solution
    use Storable;

$cookies_db = "./.cookies";
%cookies = %{retrieve($cookies_db)} if -r $cookies_db;

use IO::Socket;
use IO::Socket::SSL;

foreach $url (@ARGV) {

    # parse URL
    my ($protocol, $host, $port, $path) = $url =~ /(https?):\/\/([^\/:]+)(?::(\d+))?(.*)/ or die;

    # connect to web server
    if ($protocol eq "http") {
        $c = IO::Socket::INET->new(PeerAddr => $host, PeerPort  => $port || 80) or die;
    } else {
        $c = IO::Socket::SSL->new(PeerAddr => $host, PeerPort  => $port || 443) or die;
    }

    $path = "/" if $path eq "";

    # send request for web page to server
    print $c "GET $path HTTP/1.0\r\n\r\n";

    # add any cookie value for domain to the header of request
    foreach $domain (keys %cookies) {
        next if $host !~ /$domain$/;
        foreach $cookie_path (keys %{$cookies{$domain}}) {
            next if $path !~ /^$cookie_path/;
            foreach $name (keys %{$cookies{$domain}{$path}}) {
                print $c "Cookie: $name=$cookies{$domain}{$path}{$name}\n";
                print STDERR "Sent cookie $name=$cookies{$domain}{$path}{$name}\n";
            }
        }
    }
    print $c "\n";

    # read response header
    while (<$c>) {
        last if /^\s*$/;
        next if !/^Set-Cookie:/i;

        # store any cookies we reveive back
        my ($name,$value, %v) = /([^=;\s]+)=([^=;\s]+)/g;
        my $domain = $v{'domain'} || $host;
        my $path = $v{'path'} || $path;
        $cookies{$domain}{$path}{$name} = $value;
        print STDERR "Received cookie $domain $path $name=$value\n";
    }

    my @webpage = <$c>;
    print STDOUT @webpage;
}

store(\%cookies, $cookies_db);

Download webget-cookies.pl



Simple insecure Perl web server which runs CGI scripts
access by curl -I http://localhost:2041/ or via webget.pl http://localhost:2041/ or via any web browser
    use IO::Socket::INET;

$server = IO::Socket::INET->new(LocalPort => 2041, ReuseAddr => 1, Listen => 16) or die;

open my $mt, '<', "/etc/mime.types" or die "Can not open /etc/mime.types: $!\n";
while ($line = <$mt>) {

    $line =~ s/#.*//; # remove comments

    my ($mime_type, @extensions) = split /\s+/, $line;
    foreach $extension (@extensions) {
        $mime_type{$extension} = $mime_type;
    }
}
close $mt;

print "Access this server at http://localhost:2041/\n\n";

while ($c = $server->accept()) {
    my $request = <$c>;
    print "Connection from ", $c->peerhost, ": $request";

    # default return values
    my $status_line = "400 BAD REQUEST";
    my $content_type = "text/plain";
    my $content = "";

    my ($path, $query) = $request =~ /^GET (.+?)(?:\?(.*))? HTTP\/1.[01]\s*$/;


    if ($path) {

        # remove .. from PATH
        $path =~ s?(^|/)\.\.(/|$)?/?g;

        my $filename =  "./$path";

        # send back contents of index.html if path is directory and it exists
        $filename .= "/index.html" if -e "$filename/index.html";

        # get the extension e.g. jpg for a file
        my ($extension) = $filename =~ /\.(\w+)$/;

        if ($extension and $extension eq 'cgi' and -x $filename) {
            # run CGI script
            print "Executing $filename\n";
            $ENV{REQUEST_PATH} = $path;
            $ENV{REQUEST_QUERY} = $query;

            # back quotes run a shell so beware shell-metacharacters
            # major vulnerability
            my $response =  `$filename`;

            # return header & body as response
            if ($response =~ /\n/) {
                print $c "HTTP/1.0 $status_line\n";
                print $c $response;
                close $c;
                next;
            }
        } elsif (open my $f, '<', $filename) {
            print "Sending back $filename\n";

            $content = join "", <$f>;
            close $f;

            $content_type = $mime_type{$extension} if $extension && $mime_type{$extension};

            $status_line = "200 OK";
        } else {
            $status_line = "404 FILE NOT FOUND";
        }
    }

    print $c "HTTP/1.0 $status_line\n";
    print $c "Content-Type: $content_type\n";
    printf $c "Content-Length: %d\n", length($content);
    print $c "\n";

    print $c $content;

    close $c;
}

Download webserver-cgi.pl


    echo Content-Type: text/html
echo
echo Path was $REQUEST_PATH
echo Query was $REQUEST_QUERY

Download simple.cgi


simple interactive CGI
    if test -z $REQUEST_QUERY
then
    number=0
else
    number=$REQUEST_QUERY
fi

cat <<eof
Content-Type: text/html

<html>
<head></head>
<body>
$number
<p>
<a href="$REQUEST_PATH?$(expr $number + 1)">Up</a>
<p>
<a href="$REQUEST_PATH?$(expr $number - 1)">Down</a>
</body>
</html>
eof

Download interactive.cgi


fetch files via http from the webserver at the specified URL a very slow client which will hang (DOS) a single-threaded webserver
see HTTP::Request::Common for a more general solution
    use IO::Socket::INET;

foreach $url (@ARGV) {

    # parse URL
    my ($protocol, $host, $port, $path) = $url =~ /(https?):\/\/([^\/:]+)(?::(\d+))?(.*)/ or die;

    # connect to web server
    if ($protocol eq "http") {
        $c = IO::Socket::INET->new(PeerAddr => $host, PeerPort  => $port || 80) or die;
    } else {
        $c = IO::Socket::SSL->new(PeerAddr => $host, PeerPort  => $port || 443) or die;
    }

    # open connection to webserver then sleep for an hour
    sleep 3600;

    $path = "/" if $path eq "";

    # send request for web page to server
    print $c "GET $path HTTP/1.0\r\n\r\n";

    # read & print what the server returns
    my @webpage = <$c>;
    close $c;
    print "GET $url =>\n", @webpage, "\n";
}

Download webget-slow.pl



Simple parallel Perl web server
access by curl -I http://localhost:2041/ or via webget.pl http://localhost:2041/ or via any web browser
    use IO::Socket::INET;

$server = IO::Socket::INET->new(LocalPort => 2041, ReuseAddr => 1, Listen => 16) or die;

print "Access this server at http://localhost:2041/\n\n";

while ($c = $server->accept()) {

    if (fork() != 0) {
        # parent process goes to waiting for next request
        close $c;
        next;
    }
    # child processes request

    my $request = <$c>;
    print "Connection from ", $c->peerhost, ": $request";

    # default return values
    my $status_line = "400 BAD REQUEST";
    my $content = "";

    my ($path) = $request =~ /^GET (.+) HTTP\/1.[01]\s*$/;

    if ($path) {
        # remove .. from PATH
        # better would be to generate real path of file and check in /var/www

        $path =~ s?(^|/)\.\.(/|$)?/?g;

        my $filename =  "/var/www/$path";

        # send back contents of index.html if path is directory and it exists
        $filename .= "/index.html" if -e "$filename/index.html";

        if (open my $f, '<', $filename) {
            print "Sending back $filename\n";
            $content = join "", <$f>;
            $status_line = "200 OK";
            close $f;
        } else {
            $status_line = "404 FILE NOT FOUND";
        }
    }

    print $c "HTTP/1.0 $status_line\n";
    print $c "Content-Type: text/html\n";
    printf $c "Content-Length: %d\n", length($content);
    print $c "\n";

    print $c $content;

    close $c;

    # child must terminate here otherwise it would compete with parent for requests
    exit 0;
}

Download webserver-parallel.pl

Make

Simple makefile
    game : main.o graphics.o world.o 
	gcc -o game main.o graphics.o world.o

main.o : main.c graphics.h world.h
	gcc -c main.c

graphics.o : graphics.c world.h 
	gcc -c graphics.c

world.o : world.c world.h 
	gcc -c world.c

clean:
	rm -f game main.o graphics.o world.o

Download Makefile.simple



Simple Perl implementation of "make".

It parses makefile rules and stores them in 2 hashes.

Building is done with a recursive function.
    $makefile_name = "Makefile";
if (@ARGV >= 2 && $ARGV[0] eq "-f") {
    shift @ARGV;
    $makefile_name = shift @ARGV;
}

parse_makefile($makefile_name);
push @ARGV, $first_target if !@ARGV;
build($_) foreach @ARGV;
exit 0;

sub parse_makefile {
    my ($file) = @_;
    open MAKEFILE, $file or die "Can not open $file: $!";
    while (<MAKEFILE>) {
        my ($target, $dependencies) = /(\S+)\s*:\s*(.*)/ or next;
        $first_target ||= $target;
        $dependencies{$target} = $dependencies;
        while (<MAKEFILE>) {
            last if !/^\t/;
            $build_command{$target} .= $_;
        }
    }
}

sub build {
    my ($target) = @_;
    my $build_command = $build_command{$target};
    die "*** No rule to make target $target\n" if !$build_command && !-e $target;
    return if !$build_command;
    my $target_build_needed = ! -e $target;
    foreach $dependency (split /\s+/, $dependencies{$target}) {
        build($dependency);
        $target_build_needed ||= -M  $target > -M $dependency;
    }
    return if !$target_build_needed;
    print $build_command;
    system $build_command;
}

Download make0.pl


Simple makefile with variables & a comment
    CC=gcc-4.9
CFLAGS=-O3 -Wall

game : main.o graphics.o world.o 
	$(CC) $(CFLAGS) -o game main.o graphics.o world.o

main.o : main.c graphics.h world.h
	$(CC) $(CFLAGS) -c main.c

graphics.o : graphics.c world.h 
	$(CC) $(CFLAGS) -c graphics.c

world.o : world.c world.h 
	$(CC) $(CFLAGS) -c world.c

clean:
	rm -f game main.o graphics.o world.o

Download Makefile.variables



Add a few lines of code to make0.pl and we can handle variables and comments.

A good example of how easy some tasks are in Perl.
    $makefile_name = "Makefile";
if (@ARGV >= 2 && $ARGV[0] eq "-f") {
    shift @ARGV;
    $makefile_name = shift @ARGV;
}

parse_makefile($makefile_name);
push @ARGV, $first_target if !@ARGV;
build($_) foreach @ARGV;
exit 0;

sub parse_makefile {
    my ($file) = @_;
    open MAKEFILE, $file or die "Can not open $file: $!\n";
    while (<MAKEFILE>) {
        s/#.*//;
        s/\$\((\w+)\)/$variable{$1}||''/eg;
        if (/^\s*(\w+)\s*=\s*(.*)$/) {
            $variable{$1} = $2;
            next;
        }
        my ($target, $dependencies) = /(\S+)\s*:\s*(.*)/ or next;
        $first_target ||= $target;
        $dependencies{$target} = $dependencies;
        while (<MAKEFILE>) {
            s/#.*//;
            s/\$\((\w+)\)/$variable{$1}||''/eg;
            last if !/^\t/;
            $build_command{$target} .= $_;
        }
    }
}

sub build {
    my ($target) = @_;
    my $build_command = $build_command{$target};
    die "*** No rule to make target $target\n" if !$build_command && !-e $target;
    return if !$build_command;
    my $target_build_needed = ! -e $target;
    foreach $dependency (split /\s+/, $dependencies{$target}) {
        build($dependency);
        $target_build_needed ||= -M  $target > -M $dependency;
    }
    return if !$target_build_needed;
    print $build_command;
    system $build_command;
}

Download make1.pl


Simple makefile with builtin variables
    game : main.o graphics.o world.o 
	$(CC) $(CFLAGS) -o $@ main.o graphics.o world.o

main.o : main.c graphics.h world.h
	$(CC) $(CFLAGS) -c $<

graphics.o : graphics.c world.h 
	$(CC) $(CFLAGS) -c $*.c

world.o : world.c world.h 
	$(CC) $(CFLAGS) -c $< -o $@

clean:
	rm -f game main.o graphics.o world.o

Download Makefile.builtin_variables


Simple makefile with builtin variables relying on implict rules
    game : main.o graphics.o world.o 
	$(CC) $(CFLAGS) -o $@ $^

main.o : main.c graphics.h world.h

graphics.o : graphics.c world.h 

world.o : world.c world.h 

clean:
	rm -f game main.o graphics.o world.o

Download Makefile.implicit



Add a few lines of code to make1.pl and we can handle some builtin variables and an implicit rule.

Another good example of how easy some tasks are in Perl.
    $makefile_name = "Makefile";
if (@ARGV >= 2 && $ARGV[0] eq "-f") {
    shift @ARGV;
    $makefile_name = shift @ARGV;
}
%variable = (CC => 'cc', CFLAGS => '');
parse_makefile($makefile_name);
push @ARGV, $first_target if !@ARGV;
build($_) foreach @ARGV;
exit 0;

sub parse_makefile {
    my ($file) = @_;
    open MAKEFILE, $file or die "Can not open $file: $!";
    while (<MAKEFILE>) {
        s/#.*//;
        s/\$\((\w+)\)/$variable{$1}||''/eg;
        if (/^\s*(\w+)\s*=\s*(.*)$/) {
            $variable{$1} = $2;
            next;
        }
        my ($target, $dependencies) = /(\S+)\s*:\s*(.*)/ or next;
        $first_target = $target if !defined $first_target;
        $dependencies{$target} = $dependencies;
        while (<MAKEFILE>) {
            s/#.*//;
            s/\$\((\w+)\)/$variable{$1}||''/eg;
            last if !/^\t/;
            $build_command{$target} .= $_;
        }
    }
}

sub build {
    my ($target) = @_;
    my $build_command = $build_command{$target};
    if (!$build_command && $target =~ /(.*)\.o/) {
        $build_command = "$variable{CC} $variable{CFLAGS} -c \$< -o \$@\n";
    }
    die "*** No rule to make target $target\n" if !$build_command && !-e $target;
    return if !$build_command;
    my $target_build_needed = ! -e $target;
    foreach $dependency (split /\s+/, $dependencies{$target}) {
        build($dependency);
        $target_build_needed ||= -M  $target > -M $dependency;
    }
    return if !$target_build_needed;
    my %builtin_variables;
    $builtin_variables{'@'} = $target;
    ($builtin_variables{'*'} = $target) =~ s/\.[^\.]*$//;
    $builtin_variables{'^'} = $dependencies{$target};
    ($builtin_variables{'<'} = $dependencies{$target}) =~  s/\s.*//;
    $build_command =~ s/\$([@*^<])/$builtin_variables{$1}||''/eg;
    print $build_command;
    system $build_command;
}

Download make2.pl

Performance
    #include <stdio.h>
#include <stdlib.h>
#include <assert.h>

void test0(int x, int y, int a[x][y]) {
    fprintf(stderr, "writing to array i-j order\n");
    for (int i = 0; i < x; i++)
        for (int j = 0; j < y; j++)
            a[i][j] = i+j;
}

void test1(int x, int y, int a[x][y]) {
    fprintf(stderr, "writing to array j-i order\n");
    for (int j = 0; j < y; j++)
        for (int i = 0; i < x; i++)
            a[i][j] = i+j;
}


int main(int argc, char*argv[]) {
    int x = atoi(argv[2]);
    int y = atoi(argv[3]);
    fprintf(stderr, "allocating a %dx%d array = %lld bytes\n", x, y, ((long long)x)*y*sizeof (int));
    void *m = malloc(x*y*sizeof (int));
    assert(m);
    if (atoi(argv[1])) {
        test1(x, y, m);
    } else {
        test0(x, y, m);
    }
    return 0;
}

Download cachegrind_example.c


    while ($line = <>) {
    $line =~ tr/A-Z/a-z/;
    foreach $word ($line =~ /[a-z]+/g) {
        $count{$word}++;
    }
}

@words = keys %count;

@sorted_words = sort {$count{$a} <=> $count{$b}} @words;

foreach $word (@sorted_words) {
    printf "%8d %s\n", $count{$word}, $word;
}

Download word_frequency.pl

    import fileinput,re, collections

count = collections.defaultdict(int)
for line in fileinput.input():
    for word in re.findall(r'\w+', line.lower()):
        count[word] += 1

words = count.keys()

sorted_words = sorted(words,  key=lambda w: count[w])

for word in sorted_words:
    print("%8d %s" % (count[word], word))

Download word_frequency.py

    tr -c a-zA-Z ' '|
tr ' ' '\n'|
tr A-Z a-z|
egrep -v '^$'|
sort|
uniq -c


Download word_frequency.sh


 Written in C for "speed" but slow on large inputs:

% gcc -O3 -o word_frequency0 word_frequency0.c % time word_frequency0 <WarAndPeace.txt >/dev/null real 0m52.726s user 0m52.643s sys 0m0.020s

 Profiling with gprof revels get function is problem

gcc -p -g word_frequency0.c -o word_frequency0_profile head -10000 WarAndPeace.txt|word_frequency0_profile >/dev/null % gprof word_frequency0_profile % cumulative self self total time seconds seconds calls ms/call ms/call name 88.90 0.79 0.79 88335 0.01 0.01 get 7.88 0.86 0.07 7531 0.01 0.01 put 2.25 0.88 0.02 80805 0.00 0.00 get_word 1.13 0.89 0.01 1 10.02 823.90 read_words 0.00 0.89 0.00 2 0.00 0.00 size 0.00 0.89 0.00 1 0.00 0.00 create_map 0.00 0.89 0.00 1 0.00 0.00 keys 0.00 0.89 0.00 1 0.00 0.00 sort_words ....

    #include <stdlib.h>
#include "time.h"
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

/*
 * returns the next word from the streeam
 * a word is a non-zero length sequence of
 * alphabetic characters
 *
 * NULL is returned if there are no more words to be read
 */
char *
get_word(FILE *stream) {
    int i, c;
    char *w;
    static char *buffer = NULL;
    static int buffer_length = 0;

    if (buffer == NULL) {
        buffer_length = 32;
        buffer = malloc(buffer_length*sizeof (char));
        if (buffer == NULL) {
            fprintf(stderr, "out of memory\n");
            exit(1);
        }
    }

    i = 0;
    while ((c = fgetc(stream)) != EOF) {
        if (!isalpha(c) && i == 0)
            continue;
        if (!isalpha(c))
            break;
        if (i >= buffer_length) {
            buffer_length += 16;
            buffer = realloc(buffer, buffer_length*sizeof (char));
            if (buffer == NULL) {
                fprintf(stderr, "out of memory\n");
                exit(1);
            }
        }
        buffer[i++] = c;
    }

    if (i == 0)
        return NULL;

    buffer[i] = '\0';

    w = malloc(strlen(buffer) + 1);
    if (w == NULL) {
        fprintf(stderr, "out of memory\n");
        exit(1);
    }
    strcpy(w, buffer);
    return w;
}

typedef struct map  map;

struct map {
       int              size;
       struct map_node  *list;
};

struct map_node {
       char             *key;
       void             *value;
       struct map_node  *next;
};


map *
create_map() {
    struct map *m;
    if ((m = malloc(sizeof *m)) == NULL) {
        fprintf(stderr, "Out of memory\n");
        exit(1);
    }
    m->size = 0;
    m->list = NULL;
    return m;
}

void *get(map *m, char *key) {
    struct map_node *v;
    for (v = m->list; v != NULL; v = v->next) {
        if (strcmp(key, v->key) == 0) {
            return v->value;
        }
    }
    return NULL;
}

void
put(map *m, char *key, void *value) {
    struct map_node *v;
    for (v = m->list; v != NULL; v = v->next) {
        if (strcmp(key, v->key) == 0) {
            v->value = value;
            return;
        }
    }

    if ((v = malloc(sizeof *v)) == NULL) {
        fprintf(stderr, "Out of memory\n");
        exit(1);
    }
    v->key = key;
    v->value = value;
    v->next = m->list;
    m->list = v;
    m->size++;
}

int
size(map *m) {
    return m->size;
}

char **keys(map *m) {
    struct map_node *v;
    int  i, n_keys = size(m);
    char **key_array;

    if ((key_array = malloc(n_keys*sizeof (char **))) == NULL) {
        fprintf(stderr, "Out of memory\n");
        exit(1);
    }
    for (v = m->list, i=0; v != NULL; v = v->next,i++)
        key_array[i] = v->key;
    return key_array;
}

static void
free_map_nodes(struct map_node  *list) {
    if (list == NULL)
        return;
    free_map_nodes(list->next);
    free(list);
}

void free_map(map *m) {
    free_map_nodes(m->list);
    free(m);
}
/*
 * One word_count struct is malloc'ed for each
 * distinct word read
 */
struct word_count {
    int count;
};

/*
 * read the words from a stream
 * associate a word_count struct with
 * each new word
 *
 * increment the count field each time the
 * word is seen
 */
map *
read_words(FILE *stream) {
    char *word;
    struct word_count *w;
    map *m;

    m = create_map();
    while (1) {
        word = get_word(stdin);
        if (word == NULL)
            return m;
        w = get(m, word);
        if (w != NULL) {
            w->count++;
            free(word);
            continue;
        }
        if ((w = malloc(sizeof *w)) == NULL) {
            fprintf(stderr, "Out of memory\n");
            exit(1);
        }
        w->count = 1;
        put(m, word, w);
    }
}

void
sort_words(char **sequence, int length) {
    int i, j;
    char *pivotValue;
    char *temp;

    if (length <= 1)
        return;

    /* start from left and right ends */

    i = 0;
    j = length - 1;

    /* use middle value as pivot */

    pivotValue = sequence[length/2];
    while (i < j) {

        /* Find two out-of-place elements */

        while (strcmp(sequence[i], pivotValue) < 0)
            i++;
        while (strcmp(sequence[j], pivotValue) > 0)
           j--;
        /* and swap them over */

        if (i <= j) {
            temp = sequence[i];
            sequence[i] = sequence[j];
            sequence[j] = temp;
            i++;
            j--;
        }
    }
    sort_words(sequence, j + 1);
    sort_words(sequence+i, length - i);
}
int
main(int argc, char *argv[]) {
    int i, n_unique_words;
    char **key_array;
    map *m;

    m = read_words(stdin);

    key_array = (char **)keys(m);

    n_unique_words = size(m);

    sort_words(key_array, n_unique_words);

    for (i = 0; i < n_unique_words; i++) {
        struct word_count *w;
        w = (struct word_count *)get(m, key_array[i]);
        printf("%5d %s\n", w->count, key_array[i]);
    }

    return 0;
}

Download word_frequency0.c


 word_frequency0.c  with linked list replaced by binary tree - much faster:

% gcc -O3 word_frequency1.c -o word_frequency1 % time word_frequency1 <WarAndPeace.txt >/dev/null real 0m0.277s user 0m0.268s sys 0m0.008s

    #include <stdlib.h>
#include "time.h"
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

/*
 * returns the next word from the streeam
 * a word is a non-zero length sequence of
 * alphabetic characters
 *
 * NULL is returned if there are no more words to be read
 */
char *
get_word(FILE *stream) {
    int i, c;
    char *w;
    static char *buffer = NULL;
    static int buffer_length = 0;

    if (buffer == NULL) {
        buffer_length = 32;
        buffer = malloc(buffer_length*sizeof (char));
        if (buffer == NULL) {
            fprintf(stderr, "out of memory\n");
            exit(1);
        }
    }

    i = 0;
    while ((c = fgetc(stream)) != EOF) {
        if (!isalpha(c) && i == 0)
            continue;
        if (!isalpha(c))
            break;
        if (i >= buffer_length) {
            buffer_length += 16;
            buffer = realloc(buffer, buffer_length*sizeof (char));
            if (buffer == NULL) {
                fprintf(stderr, "out of memory\n");
                exit(1);
            }
        }
        buffer[i++] = c;
    }

    if (i == 0)
        return NULL;

    buffer[i] = '\0';

    w = malloc(strlen(buffer) + 1);
    if (w == NULL) {
        fprintf(stderr, "out of memory\n");
        exit(1);
    }
    strcpy(w, buffer);
    return w;
}

typedef struct map  map;

struct map {
       int              size;
       struct map_tnode *tree;
};

struct map_tnode {
       char             *key;
       void             *value;
       struct map_tnode *smaller;
       struct map_tnode *larger;
};

map *
create_map() {
    struct map *m;
    if ((m = malloc(sizeof *m)) == NULL) {
        fprintf(stderr, "Out of memory\n");
        exit(1);
    }
    m->size = 0;
    m->tree = NULL;
    return m;
}

/*
 * Return the value associated with key in map m.
 */
static void *
get_tree(struct map_tnode *t, char *key) {
    int compare;
    if (t == NULL)
        return NULL;
    compare = strcmp(key, t->key);
    if (compare == 0)
        return t->value;
    else if (compare < 0)
        return get_tree(t->smaller, key);
    else
        return get_tree(t->larger, key);
}

void *get(map *m, char *key) {
    return  get_tree(m->tree, key);
}

/*
 * Return the value associated with key in map m.
 */
struct map_tnode *
put_tree(struct map_tnode *t, char *key, void *value, map *m) {
    int compare;
    if (t == NULL) {
        if ((t = malloc(sizeof *t)) == NULL) {
            fprintf(stderr, "Out of memory\n");
            exit(1);
        }
        t->key = key;
        t->value = value;
        t->smaller = NULL;
        t->larger = NULL;
        m->size++;
        return t;
    }

    compare = strcmp(key, t->key);
    if (compare == 0) {
        t->value = value;
    } else if (compare < 0)
        t->smaller = put_tree(t->smaller, key, value, m);
    else
        t->larger = put_tree(t->larger, key, value, m);
    return t;
}

void
put(map *m, char *key, void *value) {
    m->tree = put_tree(m->tree, key, value, m);
}

int
size(map *m) {
    return m->size;
}

static int
tree_to_array(struct map_tnode *t, char **key_array, int index) {
    if (t == NULL)
        return index;
    index = tree_to_array(t->smaller, key_array, index);
    key_array[index] = t->key;
    return tree_to_array(t->larger, key_array, index + 1);
}

char **keys(map *m) {
    char **key_array;

    if ((key_array = malloc(size(m)*sizeof (char **))) == NULL) {
        fprintf(stderr, "Out of memory\n");
        exit(1);
    }
    tree_to_array(m->tree, key_array, 0);
    return key_array;
}

static void
free_tnodes(struct map_tnode  *t) {
    if (t == NULL)
        return;
    free_tnodes(t->smaller);
    free_tnodes(t->larger);
    free(t);
}

void free_map(map *m) {
    free_tnodes(m->tree);
    free(m);
}

/*
 * One word_count struct is malloc'ed for each
 * distinct word read
 */
struct word_count {
    int count;
};

/*
 * read the words from a stream
 * associate a word_count struct with
 * each new word
 *
 * increment the count field each time the
 * word is seen
 */
map *
read_words(FILE *stream) {
    char *word;
    struct word_count *w;
    map *m;

    m = create_map();
    while (1) {
        word = get_word(stdin);
        if (word == NULL)
            return m;
        w = get(m, word);
        if (w != NULL) {
            w->count++;
            free(word);
            continue;
        }
        if ((w = malloc(sizeof *w)) == NULL) {
            fprintf(stderr, "Out of memory\n");
            exit(1);
        }
        w->count = 1;
        put(m, word, w);
    }
}

void
sort_words(char **sequence, int length) {
    int i, j;
    char *pivotValue;
    char *temp;

    if (length <= 1)
        return;

    /* start from left and right ends */

    i = 0;
    j = length - 1;

    /* use middle value as pivot */

    pivotValue = sequence[length/2];
    while (i < j) {

        /* Find two out-of-place elements */

        while (strcmp(sequence[i], pivotValue) < 0)
            i++;
        while (strcmp(sequence[j], pivotValue) > 0)
           j--;
        /* and swap them over */

        if (i <= j) {
            temp = sequence[i];
            sequence[i] = sequence[j];
            sequence[j] = temp;
            i++;
            j--;
        }
    }
    sort_words(sequence, j + 1);
    sort_words(sequence+i, length - i);
}
int
main(int argc, char *argv[]) {
    int i, n_unique_words;
    char **key_array;
    map *m;

    m = read_words(stdin);

    key_array = (char **)keys(m);

    n_unique_words = size(m);

    sort_words(key_array, n_unique_words);

    for (i = 0; i < n_unique_words; i++) {
        struct word_count *w;
        w = (struct word_count *)get(m, key_array[i]);
        printf("%5d %s\n", w->count, key_array[i]);
    }

    return 0;
}

Download word_frequency1.c


copy input to output using read/write system calls for each byte - very inefficient and Unix/Linux specific
    #include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>

void
copy_file_to_file(int in_fd, int out_fd) {
    while (1) {
        char c[1];
        int bytes_read = read(in_fd, c, 1);
        if (bytes_read < 0) {
                perror("cp: ");
                exit(1);
        }
        if (bytes_read == 0)
            return;
        int bytes_written = write(out_fd, c, bytes_read);
        if (bytes_written <= 0) {
            perror("cp: ");
            exit(1);
        }
    }
}

int
main(int argc, char *argv[]) {
     if (argc != 3) {
        fprintf(stderr, "cp <src-file> <destination-file>\n");
        return 1;
    }

    int in_fd = open(argv[1], O_RDONLY);
    if (in_fd < 0) {
        fprintf(stderr, "cp: %s: ", argv[1]);
        perror("");
        return 1;
    }

    int out_fd = open(argv[2], O_WRONLY|O_CREAT|O_TRUNC, S_IRWXU);
    if (out_fd <= 0) {
        fprintf(stderr, "cp: %s: ", argv[2]);
        perror("");
        return 1;
    }
    copy_file_to_file(in_fd, out_fd);
    return 0;
}

Download cp0.c


copy input to output using read/write system calls for every 4096 bytes - efficient but Unix/Linux specific
    #include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>

void
copy_file_to_file(int in_fd, int out_fd) {
    while (1) {
        char c[8192];
        int bytes_read = read(in_fd, c, sizeof c);
        if (bytes_read < 0) {
            perror("cp: ");
            exit(1);
        }
        if (bytes_read <= 0)
            return;
        int bytes_written = write(out_fd, c, bytes_read);
        if (bytes_written <= 0) {
            perror("cp: ");
            exit(1);
        }
    }
}

int
main(int argc, char *argv[]) {
     if (argc != 3) {
        fprintf(stderr, "cp <src-file> <destination-file>\n");
        return 1;
    }

    int in_fd = open(argv[1], O_RDONLY);
    if (in_fd < 0) {
        fprintf(stderr, "cp: %s: ", argv[1]);
        perror("");
        return 1;
    }

    int out_fd = open(argv[2], O_WRONLY|O_CREAT|O_TRUNC, S_IRWXU);
    if (out_fd <= 0) {
        fprintf(stderr, "cp: %s: ", argv[2]);
        perror("");
        return 1;
    }
    copy_file_to_file(in_fd, out_fd);
    return 0;
}

Download cp1.c


copy input to output using stdio functions stdio buffers reads & writes for you - efficient and portable
    #include <stdio.h>
#include <stdlib.h>

void
copy_file_to_file(FILE *in, FILE *out) {
    while (1) {
        int ch = fgetc(in);
        if (ch == EOF)
             break;
        if (fputc(ch, out) == EOF) {
            fprintf(stderr, "cp:");
            perror("");
            exit(1);
        }
    }
}

int
main(int argc, char *argv[]) {
    FILE *in, *out;

    if (argc != 3) {
        fprintf(stderr, "cp <src-file> <destination-file>\n");
        return 1;
    }

    in = fopen(argv[1], "r");
    if (in == NULL) {
        fprintf(stderr, "cp: %s: ", argv[1]);
        perror("");
        return 1;
    }

    out = fopen(argv[2], "w");
    if (out == NULL) {
        fprintf(stderr, "cp: %s: ", argv[2]);
        perror("");
        return 1;
    }
    copy_file_to_file(in, out);
    return 0;
}

Download cp2.c


    #include <stdio.h>
#include <stdlib.h>

// copy input to output using stdio functions
// stdio buffers reads & writes for you - efficient and portable

void
copy_file_to_file(FILE *in, FILE *out) {
    char input[4096];

    while (1) {
        if(fgets(input, sizeof input, in) == NULL) {
            break;
        }
        if (fprintf(out, "%s", input) == EOF) {
            fprintf(stderr, "cp:");
            perror("");
            exit(1);
        }
    }
}

int
main(int argc, char *argv[]) {
    FILE *in, *out;

    if (argc != 3) {
        fprintf(stderr, "cp <src-file> <destination-file>\n");
        return 1;
    }

    in = fopen(argv[1], "r");
    if (in == NULL) {
        fprintf(stderr, "cp: %s: ", argv[1]);
        perror("");
        return 1;
    }

    out = fopen(argv[2], "w");
    if (out == NULL) {
        fprintf(stderr, "cp: %s: ", argv[2]);
        perror("");
        return 1;
    }
    copy_file_to_file(in, out);
    return 0;
}

Download cp3.c


Simple cp implementation reading entire file into array
    die "Usage: cp <infile> <outfile>\n" if @ARGV != 2;
$infile = shift @ARGV;
$outfile = shift @ARGV;
open IN, '<', $infile or die "Cannot open $infile: $!\n";
open OUT, '>', $outfile or die "Cannot open $outfile: $!\n";
print OUT <IN>;

Download cp4.pl


Simple cp implementation reading entire file into array
    die "Usage: cp <infile> <outfile>\n" if @ARGV != 2;
$infile = shift @ARGV;
$outfile = shift @ARGV;
open IN, '<', $infile or die "Cannot open $infile: $!\n";
open OUT, '>', $outfile or die "Cannot open $outfile: $!\n";
undef $/;
print OUT <IN>;

Download cp5.pl

    #include <stdio.h>
#include <stdlib.h>

int fib(int n) {
    if (n < 3) return 1;
    return fib(n-1) + fib(n-2);
}

int main(int argc, char *argv[]) {
    for (int i = 1; i < argc; i++) {
        int n = atoi(argv[i]);
        printf("fib(%d) = %d\n", n, fib(n));
    }
    return 0;
}

Download fib0.c

    sub fib {
    my ($n) = @_;
    return 1 if $n < 3;
    return fib($n-1) + fib($n-2);
}
printf "fib(%d) = %d\n", $_, fib($_) foreach @ARGV;

Download fib0.pl

    sub fib {
    my ($n) = @_;
    return 1 if $n < 3;
    return $fib{$n} || ($fib{$n} = fib($n-1) + fib($n-2));
}
printf "fib(%d) = %d\n", $_, fib($_) foreach @ARGV;

Download fib1.pl

    use Memoize;
sub fib($);
memoize('fib');
printf "fib(%d) = %d\n", $_, fib($_) foreach @ARGV;
sub fib($) {
    my ($n) = @_;
    return 1 if $n < 3;
    return fib($n-1) + fib($n-2);
}

Download fib2.pl

Exam