#!/bin/dash

# Count the number of time each different word occurs
# in the files given as arguments, or stdin if no arguments,
# e.g. word_frequency.sh dracula.txt
# written by andrewt@unsw.edu.au as a COMP(2041|9044) example

cat "$@" |                   # tr doesn't take filenames as arguments
tr 'A-Z' 'a-z'|              # map uppercase to lower case, better - tr '[:upper:]' '[:lower:]'
tr ' ' '\n' |                # convert to one word per line
tr -cd "a-z'" |              # remove all characters except a-z and '
grep -E -v '^$' |            # remove empty lines
sort |                       # place words in alphabetical order
uniq -c |                    # count how many times each word occurs
sort -rn                     # order in reverse frequency of occurrence

# notes:
# - first 2 tr commands could be combined
# - sed 's/ /\n/g' could be used instead of tr ' ' '\n'
# - sed "s/[^a-z']//g" could be used instead of tr -cd "a-z'"