| COMP9315 24T1 |
COMP9315 22T1 Exam
Sample Solutions |
Database Systems |
In Q1-Q3, the red code is what you had to write;
the rest of the code was supplied.
// COMP9315 22T1 Final Exam Q1 // Count tuples in a no-frills file #include#include #include #include #include #include #include #include "no-frills.h" int main(int argc, char **argv) { if (argc < 2) { printf("Usage: %s DataFile\n", argv[0]); exit(1); } int fd = open(argv[1],O_RDONLY); if (fd < 0) { printf("Can't open file %s\n", argv[1]); exit(1); } int ntuples = 0; int npages = 0; char page[PAGESIZE]; int nb; // # bytes read by read() while ((nb = read(fd,page,PAGESIZE)) == PAGESIZE) { ntuples += page[0]; npages++; } if (npages > 0 && nb > 0 && nb < PAGESIZE) ntuples = -1; if (npages == 0) ntuples = -1; printf("%d\n",ntuples); return 0; }
// COMP9315 22T1 Final Exam Q2 // Find longest tuple in a no-frills file #include#include #include #include #include #include #include #include "no-frills.h" int main(int argc, char **argv) { if (argc < 2) { printf("Usage: %s DataFile\n", argv[0]); exit(1); } int fd = open(argv[1],O_RDONLY); if (fd < 0) { printf("Can't open file %s\n", argv[1]); exit(1); } char longest[MAXTUPLEN]; char page[PAGESIZE]; char *cur; // pointer to current tuple int nb; // #bytes from read() int npages = 0; strcpy(longest,""); while ((nb = read(fd,page,PAGESIZE)) == PAGESIZE) { npages++; cur = &page[1]; while (*cur != '\0') { int length = strlen(cur); if (length == 0) break; if (length > strlen(longest)) strcpy(longest,cur); cur = cur + length + 1; } } if (npages > 0 && nb > 0 && nb < PAGESIZE) strcpy(longest," "); if (npages == 0) strcpy(longest," printf("%s\n",longest); return 0; }"); if (strcmp(longest,"") == 0) strcpy(longest," ");
// COMP9315 22T1 Final Exam Q3 // Read tuples from stdin and store in no-frills file // Start from empty file, add new pages as needed #include#include #include #include #include #include #include "no-frills.h" int main(int argc, char **argv) { if (argc < 3) { fprintf(stderr, "Usage: %s DataFile TupleFile\n", argv[0]); exit(1); } unsigned int mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH; int fd = open(argv[1],O_RDWR|O_CREAT|O_TRUNC,mode); if (fd < 0) { fprintf(stderr, "Can't open data file.\n"); exit(1); } FILE *input = fopen(argv[2],"r"); if (input == NULL) { fprintf(stderr, "Can't open data file.\n"); exit(1); } char page[PAGESIZE]; bzero(page,PAGESIZE); char *cur = &page[1]; // position in page char *top = &page[PAGESIZE-1]; char tuple[MAXTUPLEN]; int npages = 0; while (fgets(tuple,MAXTUPLEN,input) != NULL) { tuple[strlen(tuple)-1] = '\0'; int tuplen = strlen(tuple)+1; // printf("%s\n",tuple); // no room in page for this tuple if (cur+tuplen > top) { write(fd,page,PAGESIZE); bzero(page,PAGESIZE); cur = &page[1]; npages++; } strcpy(cur,tuple); cur += tuplen; page[0]++; } if (page[0] > 0 || npages == 0) { write(fd,page,PAGESIZE); } return 0; }
# COMP9315 22T1 Final Exam Q4 Type your answer(s) to replace the xxx's Submit this file as your answer for Q4 a. 4 size of header 4 size of integer 8 size of char(8), no alignment padding needed 8 size of real R = 4 + 4 + 8 + 8 = 24 b. 4096 - 96 = 4000 bytes for tuples per page c = floor(4000/24) = 166 c. b = ceil(10000/166) = 61
# COMP9315 22T1 Final Exam Q5
Type your answer(s) to replace the xxx's
Submit this file as your answer for Q5
a. cost of projection using sorting for duplicate removal
projection phase:
read 200 pages of original tuples
write 100 pages of projected tuples
cost = 200 + 100 = 300
sort phase (of projected tuples):
1st round:
31 buffers for 4 sorted runs of 31 (last run < 31)
cost = 100 + 100 = 200
2nd round:
reserve 1 buffer for output, have 30 available
do 30-way merge (with only 4 sorted runs, this is really a 4-way merge)
cost = 100 + 100 = 200
cost = 200 + 200 = 400
duplicate removal:
read 100 sorted pages
write 80 result pages
cost = 100 + 80 = 180
total cost = 300 + 400 + 180 = 880
b. cost of projection using hashing for duplicate removal
projection phase (same as before):
output pages = 100
cost = 300
1st partioning phase:
reserve 1 buffer for input
30 buffers ⇒ 30 partitions
use assumption given in question on size of partitions
read 100 pages of projected tuples
write 100 pages of partitions
cost = 100 + 100 = 200
2nd partioning phase (duplicate removal):
reserve 1 buffer for input
30 buffers leftover ⇒ 30 partitions
assume no 2nd-parititon buffers will be full, hence no re-reading
read 100 partition pages (from 1st phase)
write 80 result pages
cost = 100 + 80 = 180
total cost = 300 + 200 + 180 = 680
# COMP9315 22T1 Final Exam Q6 Type your answer(s) to replace the xxx's Submit this file as your answer for Q6 State before inserting 6 [0] 2,4 [1] 1,3 → 5 d = 1, sp = 0 State after splitting and inserting 6 [0] 4 [1] 1,3 → 5 [2] 2,6 d = 1, sp = 1 State before inserting 12 [0] 4,8 [1] 1,3 → 5,7 → 9,11 [2] 2,6 → 10 d = 1, sp = 1 State after splitting and inserting 12 [0] 4,8 → 12 [1] 1,5 → 9 [2] 2,6 → 10 [3] 3,7 → 11 d = 2, sp = 0 State before inserting 18 [0] 4,8 → 12,16 [1] 1,5 → 9,13 → 17 [2] 2,6 → 10,14 [3] 3,7 → 11,15 d = 2, sp = 0 State after splitting and inserting 18 [0] 8,16 [1] 1,5 → 9,13 → 17 [2] 2,6 → 10,14 → 18 [3] 3,7 → 11,15 [4] 4,12 d = 2, sp = 1 State before inserting 24 [0] 8,16 [1] 1,5 → 9,13 → 17,21 [2] 2,6 → 10,14 → 18,22 [3] 3,7 → 11,15 → 19,23 [4] 4,12 → 20 d = 2, sp = 1 State after splitting and inserting 24 [0] 8,16 → 24 [1] 1,9 → 17 [2] 2,6 → 10,14 → 18,22 [3] 3,7 → 11,15 → 19,23 [4] 4,12 → 20 [5] 5,13 → 21 d = 2, sp = 2 State before inserting 30 [0] 8,16 → 24 [1] 1,9 → 17,25 [2] 2,6 → 10,14 → 18,22 → 26 [3] 3,7 → 11,15 → 19,23 → 27 [4] 4,12 → 20,28 [5] 5,13 → 21,29 d = 2, sp = 2 State after splitting and inserting 30 [0] 8,16 → 24 [1] 1,9 → 17,25 [2] 2,10 → 18,26 [3] 3,7 → 11,15 → 19,23 → 27 [4] 4,12 → 20,28 [5] 5,13 → 21,29 [6] 6,14 → 22,30 d = 2, sp = 3
# COMP9315 22T1 Final Exam Q7 Type your answer(s) to replace the xxx's Submit this file as your answer for Q7 a. Res = Sel[c=5 and d=8] S b. Tmp1 = R Join[R.c=S.c] S Res = Proj[a,d] Tmp1 c. Tmp1 = Sel[d=3] S Tmp2 = Proj[c] Tmp1 Res = Sel[R.c in Tmp2] R d. Simple/standard version: Tmp1 = Sel[b=2] R Tmp2 = Sel[c=5] S Tmp3 = Tmp1 Join[Tmp1.c=Tmp2.c] Tmp2 Tmp4 = Sel[e=10] T Tmp5 = Tmp3 Join[Tmp3.d=Tmp4.d] Tmp4 Res = Proj[a,f,g] Tmp5 Extreme version: (minmise intermediate tuple size): Tmp1 = Sel[b=2] R Tmp2 = Proj[a,c] Tmp1 Tmp3 = Sel[c=5] S Tmp4 = Proj[c] Tmp3 Tmp5 = Sel[e=10] T Tmp6 = Proj[d,f,g] Tmp4 Tmp7 = Tmp2 Join[Tmp2.c=Tmp4.c] Tmp4 Tmp8 = Tmp7 Join[Tmp7.d=Tmp6.d] Tmp6 Res = Proj[a,f,g] Tmp8
# COMP9315 22T1 Final Exam Q8
Type your answer(s) to replace the xxx's
Submit this file as your answer for Q8
a.
query descriptor, q = 0100010100
examine slices 1, 5, 7
initially, matches = 11111111
query-bits[1]
old matches = 11111111
bit-slice[1] = 10100101
bitwise AND
new matches = 10100101
query-bits[5]
old matches = 10100101
bit-slice[5] = 11101001
bitwise AND
new matches = 10100001
query-bits[7]
old matches = 10100001
bit-slice[7] = 10100001
bit-wise AND
new matches = 10100001
result matches = 10100001
b.
result match slice is 10100001...
potentially matching pages are [0], [2], [7]