*** parser-1.22.2.6.cc Fri Aug 25 09:12:43 2000 --- parser.cc Fri Aug 25 13:57:19 2000 *************** *** 10,16 **** // or the GNU Public License version 2 or later // // - // $Id: parser.cc,v 1.22.2.5 2000/08/09 18:07:48 ghutchis Exp $ // #ifdef HAVE_CONFIG_H --- 10,15 ---- *************** *** 101,107 **** --- 100,108 ---- term(output); if (output) { + if(debug) cerr << "or--" << endl; perform_or(); + if(debug) cerr << "stack:" << stack.Size() << endl; } } else *************** *** 117,137 **** void Parser::term(int output) { - int isand; factor(output); while (1) { ! if ((isand = match('&')) || match('!')) { ! factor(output); ! if (output) ! { ! perform_and(isand); ! } } else ! break; } } --- 118,152 ---- void Parser::term(int output) { factor(output); + if(debug) cerr << "term:factor" << endl; while (1) { ! if(match('&')) { ! factor(output); ! if(output) ! { ! if(debug) cerr << "and--" << endl; ! perform_and(); ! if(debug) cerr << "stack:" << stack.Size() << endl; ! } ! } ! else if(match('!')) ! { ! factor(output); ! if(output) ! { ! if(debug) cerr << "not--" << endl; ! perform_not(); ! if(debug) cerr << "stack:" << stack.Size() << endl; ! } } else ! { ! break; ! } } } *************** *** 139,147 **** void Parser::factor(int output) { ! phrase(output); ! ! if (match('(')) { expr(output); if (match(')')) --- 154,164 ---- void Parser::factor(int output) { ! if(match('"')) ! { ! phrase(output); ! } ! else if (match('(')) { expr(output); if (match(')')) *************** *** 161,181 **** } lookahead = lexan(); } ! // else ! // { ! // setError("a search word"); ! // } } //***************************************************************************** void Parser::phrase(int output) { ! int skipRest = 0; ! ! if (match('"')) ! { ! List *wordList = new List; double weight = 1.0; while (1) --- 178,194 ---- } lookahead = lexan(); } ! else ! { ! setError("a search word, a quoted phrase, a boolean expression between ()"); ! } } //***************************************************************************** void Parser::phrase(int output) { ! List *wordList = 0; double weight = 1.0; while (1) *************** *** 183,213 **** if (match('"')) { if (output) score(wordList, weight); break; } else if (lookahead == WORD) { weight *= current->weight; ! if (output && !skipRest) ! { ! perform_phrase(*wordList); ! if (wordList->Count() == 0) ! // just the start of the phrase has no results => skipRest ! skipRest = 1; ! } ! else if (lookahead == DONE) ! { ! setError("'\"'"); ! break; ! } ! lookahead = lexan(); } } // end while ! delete wordList; ! } // end if } //***************************************************************************** --- 196,224 ---- if (match('"')) { if (output) + { + if(!wordList) wordList = new List; + if(debug) cerr << "scoring phrase" << endl; score(wordList, weight); + } break; } else if (lookahead == WORD) { weight *= current->weight; ! if (output) ! perform_phrase(wordList); ! lookahead = lexan(); } + else if (lookahead == DONE) + { + setError("missing quote"); + break; + } } // end while ! if(wordList) delete wordList; } //***************************************************************************** *************** *** 260,270 **** --- 271,287 ---- String temp = current->word.get(); char *p; + if(debug) + cerr << "perform_push @"<< stack.Size() << ": " << temp << endl; if (current->isIgnore) { + if(debug) cerr << "ignore: " << temp << " @" << stack.Size() << endl; // // This word needs to be ignored. Make it so. // + ResultList *list = new ResultList; + list->isIgnore = 1; + stack.push(list); return; } *************** *** 280,286 **** //***************************************************************************** void ! Parser::perform_phrase(List &oldWords) { static int maximum_word_length = config.Value("maximum_word_length", 12); String temp = current->word.get(); --- 297,303 ---- //***************************************************************************** void ! Parser::perform_phrase(List * &oldWords) { static int maximum_word_length = config.Value("maximum_word_length", 12); String temp = current->word.get(); *************** *** 288,298 **** --- 305,324 ---- List *newWords = 0; HtWordReference *oldWord, *newWord; + // if the query is empty, no further effort is needed + if(oldWords && oldWords->Count() == 0) + { + if(debug) cerr << "phrase not found, skip" << endl; + return; + } + + if(debug) cerr << "phrase current: " << temp << endl; if (current->isIgnore) { // // This word needs to be ignored. Make it so. // + if(debug) cerr << "ignoring: " << temp << endl; return; } *************** *** 302,322 **** p[maximum_word_length] = '\0'; newWords = words[p]; // If we don't have a prior list of words, we want this one... ! if (oldWords.Count() == 0) { newWords->Start_Get(); while ((newWord = (HtWordReference *) newWords->Get_Next())) ! oldWords.Add(newWord); return; } // OK, now we have a previous list in wordList and a new list List *results = new List; ! oldWords.Start_Get(); ! while ((oldWord = (HtWordReference *) oldWords.Get_Next())) { newWords->Start_Get(); while ((newWord = (HtWordReference *) newWords->Get_Next())) --- 328,354 ---- p[maximum_word_length] = '\0'; newWords = words[p]; + if(debug) cerr << "new words count: " << newWords->Count() << endl; // If we don't have a prior list of words, we want this one... ! if (!oldWords) { + oldWords = new List; + if(debug) cerr << "phrase adding first: " << temp << endl; newWords->Start_Get(); while ((newWord = (HtWordReference *) newWords->Get_Next())) ! { ! oldWords->Add(newWord); ! } ! if(debug) cerr << "old words count: " << oldWords->Count() << endl; return; } // OK, now we have a previous list in wordList and a new list List *results = new List; ! oldWords->Start_Get(); ! while ((oldWord = (HtWordReference *) oldWords->Get_Next())) { newWords->Start_Get(); while ((newWord = (HtWordReference *) newWords->Get_Next())) *************** *** 334,355 **** } } ! oldWords.Destroy(); results->Start_Get(); while ((newWord = (HtWordReference *) results->Get_Next())) ! oldWords.Add(newWord); results->Release(); delete results; newWords->Destroy(); delete newWords; } //***************************************************************************** void Parser::score(List *wordList, double weight) { - ResultList *list = new ResultList; DocMatch *dm; HtWordReference *wr; static double text_factor = config.Double("text_factor", 1); --- 366,392 ---- } } ! if(debug) cerr << "old words count: " << oldWords->Count() << endl; ! if(debug) cerr << "results count: " << results->Count() << endl; ! oldWords->Destroy(); results->Start_Get(); while ((newWord = (HtWordReference *) results->Get_Next())) ! { ! oldWords->Add(newWord); ! } ! if(debug) cerr << "old words count: " << oldWords->Count() << endl; results->Release(); delete results; newWords->Destroy(); delete newWords; + } //***************************************************************************** void Parser::score(List *wordList, double weight) { DocMatch *dm; HtWordReference *wr; static double text_factor = config.Double("text_factor", 1); *************** *** 364,378 **** int docanchor; int word_count; - stack.push(list); if (!wordList || wordList->Count() == 0) { ! // We can't score an empty list, so this should be ignored... ! > // (setting isIgnore as well would cause errors with AND) return; } // We're now guaranteed to have a non-empty list // We'll use the number of occurences of this word for scoring word_count = wordList->Count(); --- 401,419 ---- int docanchor; int word_count; if (!wordList || wordList->Count() == 0) { ! // We can't score an empty list, so push a null pointer... ! if(debug) cerr << "score: empty list, push 0 @" << stack.Size() << endl; ! ! stack.push(0); return; } + ResultList *list = new ResultList; + if(debug) cerr << "score: push @" << stack.Size() << endl; + stack.push(list); // We're now guaranteed to have a non-empty list // We'll use the number of occurences of this word for scoring word_count = wordList->Count(); *************** *** 417,476 **** //***************************************************************************** // The top two entries in the stack need to be ANDed together. // void ! Parser::perform_and(int isand) { ResultList *l1 = (ResultList *) stack.pop(); ResultList *l2 = (ResultList *) stack.pop(); - ResultList *result = new ResultList; int i; DocMatch *dm, *dm2, *dm3; HtVector *elements; ! // ! // If either of the arguments is not present, we will use the other as ! // the result. ! // ! if (!l1 && l2) ! { ! stack.push(l2); ! return; ! } ! else if (l1 && !l2) { ! stack.push(l1); return; } ! else if (!l1 && !l2) ! { ! stack.push(result); ! return; ! } ! // // If either of the arguments is set to be ignored, we will use the // other as the result. ! // ! if (l1->isIgnore) { stack.push(l2); delete l1; return; } else if (l2->isIgnore) { ! stack.push(isand ? l1 : result); delete l2; return; } stack.push(result); elements = l2->elements(); for (i = 0; i < elements->Count(); i++) { dm = (DocMatch *) (*elements)[i]; dm2 = l1->find(dm->id); ! if (dm2 ? isand : (isand == 0)) { // // Duplicate document. We just need to add the scored together. --- 458,532 ---- //***************************************************************************** // The top two entries in the stack need to be ANDed together. // + // a b a and b + // 0 0 0 + // 0 1 0 + // 0 x 0 + // 1 0 0 + // 1 1 intersect(a,b) + // 1 x a + // x 0 0 + // x 1 b + // x x x + // void ! Parser::perform_and() { ResultList *l1 = (ResultList *) stack.pop(); ResultList *l2 = (ResultList *) stack.pop(); int i; DocMatch *dm, *dm2, *dm3; HtVector *elements; ! if(!(l2 && l1)) { ! if(debug) cerr << "and: at least one empty operator, pushing 0 @" << stack.Size() << endl; ! stack.push(0); ! if(l1) delete l1; ! if(l2) delete l2; return; } ! // // If either of the arguments is set to be ignored, we will use the // other as the result. ! // remember l2 and l1, l2 not l1 ! ! if (l1->isIgnore && l2->isIgnore) { + if(debug) cerr << "and: ignoring all, pushing ignored list @" << stack.Size() << endl; + ResultList *result = new ResultList; + result->isIgnore = 1; + delete l1; delete l2; + stack.push(result); + } + else if (l1->isIgnore) + { + if(debug) cerr << "and: ignoring l1, pushing l2 @" << stack.Size() << endl; stack.push(l2); delete l1; return; } else if (l2->isIgnore) { ! if(debug) cerr << "and: ignoring l2, pushing l2 @" << stack.Size() << endl; ! stack.push(l1); delete l2; return; } + ResultList *result = new ResultList; stack.push(result); elements = l2->elements(); + + if(debug) + cerr << "perform and: " << elements->Count() << " " << l1->elements()->Count() << " "; + for (i = 0; i < elements->Count(); i++) { dm = (DocMatch *) (*elements)[i]; dm2 = l1->find(dm->id); ! if (dm2) { // // Duplicate document. We just need to add the scored together. *************** *** 484,489 **** --- 540,616 ---- result->add(dm3); } } + if(debug) + cerr << result->elements()->Count() << endl; + + elements->Release(); + delete elements; + delete l1; + delete l2; + } + + // a b a not b + // 0 0 0 + // 0 1 0 + // 0 x 0 + // 1 0 a + // 1 1 intersect(a,not b) + // 1 x a + // x 0 x + // x 1 x + // x x x + void + Parser::perform_not() + { + ResultList *l1 = (ResultList *) stack.pop(); + ResultList *l2 = (ResultList *) stack.pop(); + int i; + DocMatch *dm, *dm2, *dm3; + HtVector *elements; + + + if(!l2) + { + if(debug) cerr << "not: no positive term, pushing 0 @" << stack.Size() << endl; + stack.push(0); + if(l1) delete l1; + return; + } + if(!l1 || l1->isIgnore || l2->isIgnore) + { + if(debug) cerr << "not: no negative term, pushing positive @" << stack.Size() << endl; + stack.push(l2); + if(l1) delete l1; + return; + } + + ResultList *result = new ResultList; + if(debug) cerr << "not: pushing result @" << stack.Size() << endl; + stack.push(result); + elements = l2->elements(); + + if(debug) + cerr << "perform not: " << elements->Count() << " " << l1->elements()->Count() << " "; + + for (i = 0; i < elements->Count(); i++) + { + dm = (DocMatch *) (*elements)[i]; + dm2 = l1->find(dm->id); + if (!dm2) + { + // + // Duplicate document. We just need to add the scored together. + // + dm3 = new DocMatch; + dm3->score = dm->score; + dm3->id = dm->id; + dm3->anchor = dm->anchor; + result->add(dm3); + } + } + if(debug) + cerr << result->elements()->Count() << endl; + elements->Release(); delete elements; delete l1; *************** *** 508,523 **** // if (!l1 && result) { ! return; } else if (l1 && !result) { stack.push(l1); return; } else if (!l1 & !result) { ! stack.push(new ResultList); return; } --- 635,655 ---- // if (!l1 && result) { ! if(debug) cerr << "or: no 2nd operand" << endl; ! return; // result in top of stack } else if (l1 && !result) { + if(debug) cerr << "or: no 1st operand" << endl; + stack.pop(); stack.push(l1); return; } else if (!l1 & !result) { ! if(debug) cerr << "or: no operands" << endl; ! stack.pop(); ! stack.push(0); // empty result return; } *************** *** 539,544 **** --- 671,678 ---- } elements = l1->elements(); + if(debug) + cerr << "perform or: " << elements->Count() << " " << result->elements()->Count() << " "; for (i = 0; i < elements->Count(); i++) { dm = (DocMatch *) (*elements)[i]; *************** *** 561,566 **** --- 695,702 ---- result->add(dm2); } } + if(debug) + cerr << result->elements()->Count() << endl; elements->Release(); delete elements; delete l1; *************** *** 578,586 **** ResultList *result = (ResultList *) stack.pop(); if (!result) // Ouch! { ! valid = 0; error = 0; ! error << "Expected to have something to parse!"; return; } HtVector *elements = result->elements(); --- 714,722 ---- ResultList *result = (ResultList *) stack.pop(); if (!result) // Ouch! { ! // valid = 0; error = 0; ! // error << "Expected to have something to parse!"; return; } HtVector *elements = result->elements();