CS267
Chris Pollett
Oct. 16, 2019
rankBM25_TermAtATime((t[1], t[2], ..., t[n]), k) { sort(t) in increasing order of N[t[i]]; acc := {}, acc' := {}; //initialize accumulators. //acc used for previous round, acc' for next acc[0].docid := infty // end-of-list marker for i := 1 to n do { inPos := 0; //current pos in acc outPos := 0; // current position in acc' foreach document d in t[i]'s posting list do { while acc[inPos].docid < d do { acc'[outPos++] := acc[inPos++]; //copy previous round to current for docs not containing t[i] } acc'[outPos].docId := d; acc'[outPos].score := log(N/N[t[i]]) * TFBM25(t[i], d); if(acc[inPos].docid == d) { acc'[outPos].score += acc[inPos].score; } outPos++; } while acc[inPos] < infty do { // copy remaining acc to acc' acc'[outPos++] := acc[inPos++]; } acc'[outPos].docid :=infty; //end-of-list-marker swap acc and acc' } return the top k items of acc; //select using heap }
rankBM25_TermAtATimeWithPruning((t[1], t[2], ..., t[n]), k, amax, u) { sort(t) in increasing order of N[[t[i]]; acc := {}, acc' := {}; //initialize accumulators. acc[0].docid := infty // end-of-list marker for i := 1 to n do { quotaLeft := amax - length(acc) // the remaining accumulator quota if (N[t[i]] <= quotaLeft) { //plenty o' accumulators // do as we did in rankBM25_TermAtATime inPos := 0; //current pos in acc outPos := 0; // current position in acc' foreach document d in t[i]'s posting list do { while acc[inPos].docid < d do { acc'[outPos++] := acc[inPos++]; //copy previous round to current for docs not containing t[i] } acc'[outPos].docId := d; acc'[outPos].score := log(N/N[t[i]]) * TFBM25(t[i], d); if(acc[inPos].docid == d) { acc'[outPos].score += acc[inPos].score; } outPos++; } } else if (quotaLeft == 0) { //no accumulators left for j:=1 to length(acc) do { acc[j].score := acc[j].score + log(N/N[t[i]]) * TFBM25(t[i], acc[j].docid); } } else { //still have some accumulators for j:= 1 to k do { tfStats[j] := 0} //initialize TF stats T = 1; //init threshold for new accumulators postingsSeen := 0; inPos := 0; //current pos in acc outPos := 0; // current position in acc' foreach document d in t[i]'s posting list do { while acc[inPos].docid < d do { acc'[outPos++] := acc[inPos++]; //copy previous round to current for docs not containing t[i] } if(acc[inPos].docid == d) { acc'[outPos].docid = d acc'[outPos++].score += acc[inPos++].score + log(N/N[t[i]]) * TFBM25(t[i], d); } else if (quotaLeft > 0) { if (f[t[i],d] ≥ T) { // if happens, make new accumlator acc'[outPos].docid = d acc'[outPos++].score = log(N/N[t[i]]) * TFBM25(t[i], d); quotaLeft--; } tfStats[f[t[i],d]]++; } postingsSeen++; if (postingsSeen % u == 0) { q := (N[t[i]] - postingsSeen)/postingsSeen; T := argmin_x{x in Nat| sum_(j=1)^x(tfStats[j] * q ≥ quotaLeft} } } } while acc[inPos] < infty do { // copy remaining acc to acc' acc'[outPos++] := acc[inPos++]; } acc'[outPos].docid :=infty; //end-of-list-marker swap acc and acc' } return the top k items of acc; //select using heap }
Given a list of ordered pairs, `S`, suggest pseudo-code to compute `G(S)`. What is the runtime of your code?
Post your solutions to the Oct 16 In-Class Exercise.
tau(t, k) := if (k == infty) { u :=infty; } else if (k == -infty) { u := -infty; } else { u := next(t, k-1); } return [u, u]