COMP9315 24T1 |
COMP9315 22T1 Exam
Sample Solutions |
Database Systems |
In Q1-Q3, the red code is what you had to write;
the rest of the code was supplied.
// COMP9315 22T1 Final Exam Q1 // Count tuples in a no-frills file #include#include #include #include #include #include #include #include "no-frills.h" int main(int argc, char **argv) { if (argc < 2) { printf("Usage: %s DataFile\n", argv[0]); exit(1); } int fd = open(argv[1],O_RDONLY); if (fd < 0) { printf("Can't open file %s\n", argv[1]); exit(1); } int ntuples = 0; int npages = 0; char page[PAGESIZE]; int nb; // # bytes read by read() while ((nb = read(fd,page,PAGESIZE)) == PAGESIZE) { ntuples += page[0]; npages++; } if (npages > 0 && nb > 0 && nb < PAGESIZE) ntuples = -1; if (npages == 0) ntuples = -1; printf("%d\n",ntuples); return 0; }
// COMP9315 22T1 Final Exam Q2 // Find longest tuple in a no-frills file #include#include #include #include #include #include #include #include "no-frills.h" int main(int argc, char **argv) { if (argc < 2) { printf("Usage: %s DataFile\n", argv[0]); exit(1); } int fd = open(argv[1],O_RDONLY); if (fd < 0) { printf("Can't open file %s\n", argv[1]); exit(1); } char longest[MAXTUPLEN]; char page[PAGESIZE]; char *cur; // pointer to current tuple int nb; // #bytes from read() int npages = 0; strcpy(longest,""); while ((nb = read(fd,page,PAGESIZE)) == PAGESIZE) { npages++; cur = &page[1]; while (*cur != '\0') { int length = strlen(cur); if (length == 0) break; if (length > strlen(longest)) strcpy(longest,cur); cur = cur + length + 1; } } if (npages > 0 && nb > 0 && nb < PAGESIZE) strcpy(longest," "); if (npages == 0) strcpy(longest," printf("%s\n",longest); return 0; }"); if (strcmp(longest,"") == 0) strcpy(longest," ");
// COMP9315 22T1 Final Exam Q3 // Read tuples from stdin and store in no-frills file // Start from empty file, add new pages as needed #include#include #include #include #include #include #include "no-frills.h" int main(int argc, char **argv) { if (argc < 3) { fprintf(stderr, "Usage: %s DataFile TupleFile\n", argv[0]); exit(1); } unsigned int mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH; int fd = open(argv[1],O_RDWR|O_CREAT|O_TRUNC,mode); if (fd < 0) { fprintf(stderr, "Can't open data file.\n"); exit(1); } FILE *input = fopen(argv[2],"r"); if (input == NULL) { fprintf(stderr, "Can't open data file.\n"); exit(1); } char page[PAGESIZE]; bzero(page,PAGESIZE); char *cur = &page[1]; // position in page char *top = &page[PAGESIZE-1]; char tuple[MAXTUPLEN]; int npages = 0; while (fgets(tuple,MAXTUPLEN,input) != NULL) { tuple[strlen(tuple)-1] = '\0'; int tuplen = strlen(tuple)+1; // printf("%s\n",tuple); // no room in page for this tuple if (cur+tuplen > top) { write(fd,page,PAGESIZE); bzero(page,PAGESIZE); cur = &page[1]; npages++; } strcpy(cur,tuple); cur += tuplen; page[0]++; } if (page[0] > 0 || npages == 0) { write(fd,page,PAGESIZE); } return 0; }
# COMP9315 22T1 Final Exam Q4 Type your answer(s) to replace the xxx's Submit this file as your answer for Q4 a. 4 size of header 4 size of integer 8 size of char(8), no alignment padding needed 8 size of real R = 4 + 4 + 8 + 8 = 24 b. 4096 - 96 = 4000 bytes for tuples per page c = floor(4000/24) = 166 c. b = ceil(10000/166) = 61
# COMP9315 22T1 Final Exam Q5 Type your answer(s) to replace the xxx's Submit this file as your answer for Q5 a. cost of projection using sorting for duplicate removal projection phase: read 200 pages of original tuples write 100 pages of projected tuples cost = 200 + 100 = 300 sort phase (of projected tuples): 1st round: 31 buffers for 4 sorted runs of 31 (last run < 31) cost = 100 + 100 = 200 2nd round: reserve 1 buffer for output, have 30 available do 30-way merge (with only 4 sorted runs, this is really a 4-way merge) cost = 100 + 100 = 200 cost = 200 + 200 = 400 duplicate removal: read 100 sorted pages write 80 result pages cost = 100 + 80 = 180 total cost = 300 + 400 + 180 = 880 b. cost of projection using hashing for duplicate removal projection phase (same as before): output pages = 100 cost = 300 1st partioning phase: reserve 1 buffer for input 30 buffers ⇒ 30 partitions use assumption given in question on size of partitions read 100 pages of projected tuples write 100 pages of partitions cost = 100 + 100 = 200 2nd partioning phase (duplicate removal): reserve 1 buffer for input 30 buffers leftover ⇒ 30 partitions assume no 2nd-parititon buffers will be full, hence no re-reading read 100 partition pages (from 1st phase) write 80 result pages cost = 100 + 80 = 180 total cost = 300 + 200 + 180 = 680
# COMP9315 22T1 Final Exam Q6 Type your answer(s) to replace the xxx's Submit this file as your answer for Q6 State before inserting 6 [0] 2,4 [1] 1,3 → 5 d = 1, sp = 0 State after splitting and inserting 6 [0] 4 [1] 1,3 → 5 [2] 2,6 d = 1, sp = 1 State before inserting 12 [0] 4,8 [1] 1,3 → 5,7 → 9,11 [2] 2,6 → 10 d = 1, sp = 1 State after splitting and inserting 12 [0] 4,8 → 12 [1] 1,5 → 9 [2] 2,6 → 10 [3] 3,7 → 11 d = 2, sp = 0 State before inserting 18 [0] 4,8 → 12,16 [1] 1,5 → 9,13 → 17 [2] 2,6 → 10,14 [3] 3,7 → 11,15 d = 2, sp = 0 State after splitting and inserting 18 [0] 8,16 [1] 1,5 → 9,13 → 17 [2] 2,6 → 10,14 → 18 [3] 3,7 → 11,15 [4] 4,12 d = 2, sp = 1 State before inserting 24 [0] 8,16 [1] 1,5 → 9,13 → 17,21 [2] 2,6 → 10,14 → 18,22 [3] 3,7 → 11,15 → 19,23 [4] 4,12 → 20 d = 2, sp = 1 State after splitting and inserting 24 [0] 8,16 → 24 [1] 1,9 → 17 [2] 2,6 → 10,14 → 18,22 [3] 3,7 → 11,15 → 19,23 [4] 4,12 → 20 [5] 5,13 → 21 d = 2, sp = 2 State before inserting 30 [0] 8,16 → 24 [1] 1,9 → 17,25 [2] 2,6 → 10,14 → 18,22 → 26 [3] 3,7 → 11,15 → 19,23 → 27 [4] 4,12 → 20,28 [5] 5,13 → 21,29 d = 2, sp = 2 State after splitting and inserting 30 [0] 8,16 → 24 [1] 1,9 → 17,25 [2] 2,10 → 18,26 [3] 3,7 → 11,15 → 19,23 → 27 [4] 4,12 → 20,28 [5] 5,13 → 21,29 [6] 6,14 → 22,30 d = 2, sp = 3
# COMP9315 22T1 Final Exam Q7 Type your answer(s) to replace the xxx's Submit this file as your answer for Q7 a. Res = Sel[c=5 and d=8] S b. Tmp1 = R Join[R.c=S.c] S Res = Proj[a,d] Tmp1 c. Tmp1 = Sel[d=3] S Tmp2 = Proj[c] Tmp1 Res = Sel[R.c in Tmp2] R d. Simple/standard version: Tmp1 = Sel[b=2] R Tmp2 = Sel[c=5] S Tmp3 = Tmp1 Join[Tmp1.c=Tmp2.c] Tmp2 Tmp4 = Sel[e=10] T Tmp5 = Tmp3 Join[Tmp3.d=Tmp4.d] Tmp4 Res = Proj[a,f,g] Tmp5 Extreme version: (minmise intermediate tuple size): Tmp1 = Sel[b=2] R Tmp2 = Proj[a,c] Tmp1 Tmp3 = Sel[c=5] S Tmp4 = Proj[c] Tmp3 Tmp5 = Sel[e=10] T Tmp6 = Proj[d,f,g] Tmp4 Tmp7 = Tmp2 Join[Tmp2.c=Tmp4.c] Tmp4 Tmp8 = Tmp7 Join[Tmp7.d=Tmp6.d] Tmp6 Res = Proj[a,f,g] Tmp8
# COMP9315 22T1 Final Exam Q8 Type your answer(s) to replace the xxx's Submit this file as your answer for Q8 a. query descriptor, q = 0100010100 examine slices 1, 5, 7 initially, matches = 11111111 query-bits[1] old matches = 11111111 bit-slice[1] = 10100101 bitwise AND new matches = 10100101 query-bits[5] old matches = 10100101 bit-slice[5] = 11101001 bitwise AND new matches = 10100001 query-bits[7] old matches = 10100001 bit-slice[7] = 10100001 bit-wise AND new matches = 10100001 result matches = 10100001 b. result match slice is 10100001... potentially matching pages are [0], [2], [7]