#!/bin/dash # written by andrewt@unsw.edu.au for COMP(2041|9044) # Improved version of plagiarism_detection.comments.sh # change all C strings to the letter 's' # and change all identifiers to the letter 'v'. # Hence changes in strings & identifiers will be ignored. # mktemp provide suitable temporary filename, robustly & securely TMP_FILE1=$(mktemp) TMP_FILE2=$(mktemp) # trap allows use to remove temporary files if program interrupted trap 'rm -f $TMP_FILE1 $TMP_FILE2' EXIT # s/"["]*"/s/g changes strings to the letter 's' # It won't match a few C strings which is OK for our purposes # s/[a-zA-Z_][a-zA-Z0-9_]*/v/g changes variable names to 'v' # It will also change function names, keywords etc. which is OK for our purposes. transform() { sed ' s/\/\/.*// s/"[^"]"/s/g s/[a-zA-Z_][a-zA-Z0-9_]*/v/g ' $1 } for file1 in "$@" do for file2 in "$@" do test "$file1" = "$file2" && break # avoid comparing pairs of assignments twice transform "$file1" >$TMP_FILE1 transform "$file2" >$TMP_FILE2 if diff -iBw $TMP_FILE1 $TMP_FILE2 >/dev/null then echo "$file1 is a copy of $file2" fi done done