CS267
Chris Pollett
Oct. 24, 2011
Which of the following is true?
rankBM25_TermAtATime((t[1], t[2], ..., t[n]), k) {
sort(t) in increasing order of `N_t_i`;
acc := {}, acc' := {}; //initialize accumulators.
acc[0].docid := infty // end-of-list marker
for i := 1 to n do {
inPos := 0; //current pos in acc
outPos := 0; // current position in acc'
foreach document d in t[i]'s posting list do {
while acc[inPos].docid < d do {
acc'[outPos++] := acc[inPos++];
//copy before first doc of t[i] the came from earlier t[j]
}
acc'[outPos].docId := d;
acc'[outPos].score := log(N/N_t) * TFBM25(t[i], d);
if(acc[inPos].docid == d) {
acc'[outPos].score += acc[inPos].score;
}
outPos++;
}
while acc[inPos] < infty do { // copy remaining acc to acc'
acc'[outPos++] := acc[inPos++];
}
acc'[outPos].docid :=infty; //end-of-list-marker
swap acc and acc'
}
return the top k items of acc; //select using heap
}
The worst case complexity of this algorithm is `Theta(N_q cdot n + N_q cdot log(k))`.