COMP9315 24T1 COMP9315 22T1 Exam
Sample Solutions
Database Systems

These solutions are simply suggestions. In most cases many alternatives
exist which would be equally correct and also worth full marks.

In Q1-Q3, the red code is what you had to write;
the rest of the code was supplied.

Q1

// COMP9315 22T1 Final Exam Q1
// Count tuples in a no-frills file

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include "no-frills.h"

int main(int argc, char **argv)
{
	if (argc < 2) {
		printf("Usage: %s DataFile\n", argv[0]);
		exit(1);
	}
	int fd = open(argv[1],O_RDONLY);
	if (fd < 0) {
		printf("Can't open file %s\n", argv[1]);
		exit(1);
	}
	int ntuples = 0;
	int npages = 0;
	char page[PAGESIZE];
	int nb; // # bytes read by read()

	while ((nb = read(fd,page,PAGESIZE)) == PAGESIZE) {
		ntuples += page[0];
		npages++;
	}

	if (npages > 0 && nb > 0 && nb < PAGESIZE) ntuples = -1;
	if (npages == 0) ntuples = -1; 
	printf("%d\n",ntuples);
	return 0;
}

Q2

// COMP9315 22T1 Final Exam Q2
// Find longest tuple in a no-frills file

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include "no-frills.h"

int main(int argc, char **argv)
{
	if (argc < 2) {
		printf("Usage: %s DataFile\n", argv[0]);
		exit(1);
	}
	int fd = open(argv[1],O_RDONLY);
	if (fd < 0) {
		printf("Can't open file %s\n", argv[1]);
		exit(1);
	}
	char longest[MAXTUPLEN];
	char page[PAGESIZE];
	char *cur; // pointer to current tuple
	int  nb; // #bytes from read()
	int  npages = 0;

	strcpy(longest,"");
	while ((nb = read(fd,page,PAGESIZE)) == PAGESIZE) {
		npages++;
		cur = &page[1];
		while (*cur != '\0') {
			int length = strlen(cur);
			if (length == 0) break;
			if (length > strlen(longest))
				strcpy(longest,cur);
			cur = cur + length + 1;
		}
	}

	if (npages > 0 && nb > 0 && nb < PAGESIZE) strcpy(longest,"");
	if (npages == 0) strcpy(longest,"");
	if (strcmp(longest,"") == 0) strcpy(longest,"");
	printf("%s\n",longest);
	return 0;
}

Q3

// COMP9315 22T1 Final Exam Q3
// Read tuples from stdin and store in no-frills file
// Start from empty file, add new pages as needed

#include 
#include 
#include 
#include 
#include 
#include 
#include "no-frills.h"

int main(int argc, char **argv)
{
	if (argc < 3) {
		fprintf(stderr, "Usage: %s DataFile TupleFile\n", argv[0]);
		exit(1);
	}
	unsigned int mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH;
	int fd = open(argv[1],O_RDWR|O_CREAT|O_TRUNC,mode);
	if (fd < 0) {
		fprintf(stderr, "Can't open data file.\n");
		exit(1);
	}
	FILE *input = fopen(argv[2],"r");
	if (input == NULL) {
		fprintf(stderr, "Can't open data file.\n");
		exit(1);
	}

	char page[PAGESIZE];
	bzero(page,PAGESIZE);
	char *cur = &page[1];  // position in page
	char *top = &page[PAGESIZE-1];
	char tuple[MAXTUPLEN];
	int npages = 0;

	while (fgets(tuple,MAXTUPLEN,input) != NULL) {
		tuple[strlen(tuple)-1] = '\0';
		int tuplen = strlen(tuple)+1;
		// printf("%s\n",tuple);
		// no room in page for this tuple
		if (cur+tuplen > top) { 
			write(fd,page,PAGESIZE);
			bzero(page,PAGESIZE);
			cur = &page[1];
			npages++;
		}
		strcpy(cur,tuple);
		cur += tuplen;
		page[0]++;
	}
	if (page[0] > 0 || npages == 0) { write(fd,page,PAGESIZE); }
	return 0;
}

Q4

# COMP9315 22T1 Final Exam Q4

Type your answer(s) to replace the xxx's
Submit this file as your answer for Q4


a.

4   size of header
4   size of integer
8   size of char(8), no alignment padding needed
8   size of real

R = 4 + 4 + 8 + 8 = 24

b.

4096 - 96 = 4000 bytes for tuples per page
c = floor(4000/24) = 166

c.

b = ceil(10000/166) = 61

Q5

# COMP9315 22T1 Final Exam Q5
  
Type your answer(s) to replace the xxx's
Submit this file as your answer for Q5

a.  cost of projection using sorting for duplicate removal

projection phase:
    read 200 pages of original tuples 
    write 100 pages of projected tuples 
    cost = 200 + 100 = 300

sort phase (of projected tuples):
    1st round:
        31 buffers for 4 sorted runs of 31 (last run < 31)
        cost = 100 + 100 = 200
    2nd round: 
        reserve 1 buffer for output, have 30 available
        do 30-way merge (with only 4 sorted runs, this is really a 4-way merge)
        cost = 100 + 100 = 200
    cost = 200 + 200 = 400

duplicate removal:
    read 100 sorted pages
    write 80 result pages 
    cost = 100 + 80 = 180

total cost = 300 + 400 + 180 = 880


b.  cost of projection using hashing for duplicate removal

projection phase (same as before):
    output pages = 100
    cost = 300 

1st partioning phase: 
    reserve 1 buffer for input 
    30 buffers ⇒ 30 partitions 
    use assumption given in question on size of partitions
    read 100 pages of projected tuples
    write 100 pages of partitions 
    cost = 100 + 100 = 200

2nd partioning phase (duplicate removal):
    reserve 1 buffer for input
    30 buffers leftover ⇒ 30 partitions
    assume no 2nd-parititon buffers will be full, hence no re-reading 
    read 100 partition pages (from 1st phase)
    write 80 result pages
    cost = 100 + 80 = 180

total cost = 300 + 200 + 180 = 680

Q6

# COMP9315 22T1 Final Exam Q6

Type your answer(s) to replace the xxx's
Submit this file as your answer for Q6

State before inserting 6

[0] 2,4
[1] 1,3 → 5

d = 1, sp = 0

State after splitting and inserting 6

[0] 4
[1] 1,3 → 5
[2] 2,6

d = 1, sp = 1

State before inserting 12

[0] 4,8
[1] 1,3 → 5,7 → 9,11
[2] 2,6 → 10

d = 1, sp = 1

State after splitting and inserting 12

[0] 4,8 → 12
[1] 1,5 → 9
[2] 2,6 → 10
[3] 3,7 → 11

d = 2, sp = 0

State before inserting 18

[0] 4,8 → 12,16
[1] 1,5 → 9,13 → 17
[2] 2,6 → 10,14
[3] 3,7 → 11,15

d = 2, sp = 0

State after splitting and inserting 18

[0] 8,16
[1] 1,5 → 9,13 → 17
[2] 2,6 → 10,14 → 18
[3] 3,7 → 11,15
[4] 4,12 

d = 2, sp = 1

State before inserting 24

[0] 8,16
[1] 1,5 → 9,13 → 17,21
[2] 2,6 → 10,14 → 18,22
[3] 3,7 → 11,15 → 19,23
[4] 4,12 → 20

d = 2, sp = 1

State after splitting and inserting 24

[0] 8,16 → 24
[1] 1,9 → 17
[2] 2,6 → 10,14 → 18,22
[3] 3,7 → 11,15 → 19,23
[4] 4,12 → 20
[5] 5,13 → 21

d = 2, sp = 2

State before inserting 30

[0] 8,16 → 24
[1] 1,9 → 17,25
[2] 2,6 → 10,14 → 18,22 → 26
[3] 3,7 → 11,15 → 19,23 → 27
[4] 4,12 → 20,28
[5] 5,13 → 21,29

d = 2, sp = 2

State after splitting and inserting 30

[0] 8,16 → 24
[1] 1,9 → 17,25
[2] 2,10 → 18,26
[3] 3,7 → 11,15 → 19,23 → 27
[4] 4,12 → 20,28
[5] 5,13 → 21,29
[6] 6,14 → 22,30

d = 2, sp = 3

Q7

# COMP9315 22T1 Final Exam Q7
  
Type your answer(s) to replace the xxx's
Submit this file as your answer for Q7

a. 

Res = Sel[c=5 and d=8] S

b.

Tmp1 = R Join[R.c=S.c] S
Res  = Proj[a,d] Tmp1


c.

Tmp1 = Sel[d=3] S
Tmp2 = Proj[c] Tmp1
Res  = Sel[R.c in Tmp2] R


d.

Simple/standard version:

Tmp1 = Sel[b=2] R 
Tmp2 = Sel[c=5] S 
Tmp3 = Tmp1 Join[Tmp1.c=Tmp2.c] Tmp2
Tmp4 = Sel[e=10] T 
Tmp5 = Tmp3 Join[Tmp3.d=Tmp4.d] Tmp4
Res  = Proj[a,f,g] Tmp5


Extreme version:
(minmise intermediate tuple size):

Tmp1 = Sel[b=2] R
Tmp2 = Proj[a,c] Tmp1
Tmp3 = Sel[c=5] S
Tmp4 = Proj[c] Tmp3
Tmp5 = Sel[e=10] T
Tmp6 = Proj[d,f,g] Tmp4
Tmp7 = Tmp2 Join[Tmp2.c=Tmp4.c] Tmp4
Tmp8 = Tmp7 Join[Tmp7.d=Tmp6.d] Tmp6
Res  = Proj[a,f,g] Tmp8

Q8

# COMP9315 22T1 Final Exam Q8
  
Type your answer(s) to replace the xxx's
Submit this file as your answer for Q8

a. 
query descriptor, q = 0100010100
examine slices 1, 5, 7

initially, matches = 11111111

query-bits[1]
    old matches  = 11111111
    bit-slice[1] = 10100101
    bitwise AND
    new matches  = 10100101

query-bits[5]
    old matches  = 10100101
    bit-slice[5] = 11101001
    bitwise AND
    new matches  = 10100001

query-bits[7]
    old matches  = 10100001
    bit-slice[7] = 10100001
    bit-wise AND
    new matches  = 10100001

result matches = 10100001


b. 

result match slice is 10100001...

potentially matching pages are [0], [2], [7]