#!/bin/dash # written by andrewt@unsw.edu.au for COMP(2041|9044) # Improved version of plagiarism_detection.identifiers.sh TMP_FILE1=$(mktemp) TMP_FILE2=$(mktemp) trap 'rm -f $TMP_FILE1 $TMP_FILE2' EXIT # Note the use of sort so line reordering won't prevent detection of plagiarism. transform() { sed ' s/\/\/.*// s/"[^"]"/s/g s/[a-zA-Z_][a-zA-Z0-9_]*/v/g ' $1| sort } for file1 in "$@" do for file2 in "$@" do test "$file1" = "$file2" && break # avoid comparing pairs of assignments twice transform "$file1" >$TMP_FILE1 transform "$file2" >$TMP_FILE2 if diff -iBw $TMP_FILE1 $TMP_FILE2 >/dev/null then echo "$file1 is a copy of $file2" fi done done rm -f $TMP_FILE1 $TMP_FILE2