Re: [htdig3-dev] Re: Moving towards release


Subject: Re: [htdig3-dev] Re: Moving towards release
From: Gilles Detillieux (grdetil@scrc.umanitoba.ca)
Date: Mon Jan 31 2000 - 14:54:52 PST


According to Geoff Hutchison:
> It is inefficient for both reasons. I wrote this on deadline about 3AM.
> I'll clean it up a bit, or if you want to do it, feel free.

How does this look?

*** htsearch/parser.cc.scorebug Sun Jan 30 07:43:18 2000
--- htsearch/parser.cc Mon Jan 31 16:42:24 2000
*************** Parser::score(List *wordList, double wei
*** 335,340 ****
--- 335,350 ----
      ResultList *list = new ResultList;
      DocMatch *dm;
      HtWordReference *wr;
+ static double text_factor = config.Double("text_factor", 1);
+ static double caps_factor = config.Double("caps_factor", 1);
+ static double title_factor = config.Double("title_factor", 1);
+ static double heading_factor = config.Double("heading_factor", 1);
+ static double keywords_factor = config.Double("keywords_factor", 1);
+ static double meta_description_factor = config.Double("meta_description_factor", 1);
+ static double author_factor = config.Double("author_factor", 1);
+ static double description_factor = config.Double("description_factor", 1);
+ double wscore;
+ int docanchor;
  
      stack.push(list);
  
*************** Parser::score(List *wordList, double wei
*** 348,401 ****
      wordList->Start_Get();
      while ((wr = (HtWordReference *) wordList->Get_Next()))
        {
          dm = list->find(wr->DocID());
          if (dm)
            {
!
! unsigned int prevAnchor;
! double prevScore;
! prevScore = dm->score;
! prevAnchor = dm->anchor;
              // We wish to *update* this, not add a duplicate
              list->remove(wr->DocID());
-
- dm = new DocMatch;
-
- dm->score = (wr->Flags() & FLAG_TEXT) * config.Double("text_factor", 1);
- dm->score += (wr->Flags() & FLAG_CAPITAL) * config.Double("caps_factor", 1);
- dm->score += (wr->Flags() & FLAG_TITLE) * config.Double("title_factor", 1);
- dm->score += (wr->Flags() & FLAG_HEADING) * config.Double("heading_factor", 1);
- dm->score += (wr->Flags() & FLAG_KEYWORDS) * config.Double("keywords_factor", 1);
- dm->score += (wr->Flags() & FLAG_DESCRIPTION) * config.Double("meta_description_factor", 1);
- dm->score += (wr->Flags() & FLAG_AUTHOR) * config.Double("author_factor", 1);
- dm->score += (wr->Flags() & FLAG_LINK_TEXT) * config.Double("description_factor", 1);
- dm->id = wr->DocID();
- dm->score = weight * dm->score + prevScore;
- if (prevAnchor > wr->Anchor())
- dm->anchor = wr->Anchor();
- else
- dm->anchor = prevAnchor;
-
            }
- else
- {
  
! //
! // ******* Compute the score for the document
! //
! dm = new DocMatch;
! dm->score = (wr->Flags() & FLAG_TEXT) * config.Double("text_factor", 1);
! dm->score += (wr->Flags() & FLAG_CAPITAL) * config.Double("caps_factor", 1);
! dm->score += (wr->Flags() & FLAG_TITLE) * config.Double("title_factor", 1);
! dm->score += (wr->Flags() & FLAG_HEADING) * config.Double("heading_factor", 1);
! dm->score += (wr->Flags() & FLAG_KEYWORDS) * config.Double("keywords_factor", 1);
! dm->score += (wr->Flags() & FLAG_DESCRIPTION) * config.Double("meta_description_factor", 1);
! dm->score += (wr->Flags() & FLAG_AUTHOR) * config.Double("author_factor", 1);
! dm->score += (wr->Flags() & FLAG_LINK_TEXT) * config.Double("description_factor", 1);
! dm->score *= weight;
! dm->id = wr->DocID();
! dm->anchor = wr->Anchor();
! }
          list->add(dm);
        }
  }
--- 358,391 ----
      wordList->Start_Get();
      while ((wr = (HtWordReference *) wordList->Get_Next()))
        {
+ //
+ // ******* Compute the score for the document
+ //
+ wscore = 0.0;
+ if (wr->Flags() == FLAG_TEXT) wscore += text_factor;
+ if (wr->Flags() & FLAG_CAPITAL) wscore += caps_factor;
+ if (wr->Flags() & FLAG_TITLE) wscore += title_factor;
+ if (wr->Flags() & FLAG_HEADING) wscore += heading_factor;
+ if (wr->Flags() & FLAG_KEYWORDS) wscore += keywords_factor;
+ if (wr->Flags() & FLAG_DESCRIPTION) wscore += meta_description_factor;
+ if (wr->Flags() & FLAG_AUTHOR) wscore += author_factor;
+ if (wr->Flags() & FLAG_LINK_TEXT) wscore += description_factor;
+ wscore *= weight;
+ docanchor = wr->Anchor();
          dm = list->find(wr->DocID());
          if (dm)
            {
! wscore += dm->score;
! if (dm->anchor < docanchor)
! docanchor = dm->anchor;
              // We wish to *update* this, not add a duplicate
              list->remove(wr->DocID());
            }
  
! dm = new DocMatch;
! dm->id = wr->DocID();
! dm->score = wscore;
! dm->anchor = docanchor;
          list->add(dm);
        }
  }

-- 
Gilles R. Detillieux              E-mail: <grdetil@scrc.umanitoba.ca>
Spinal Cord Research Centre       WWW:    http://www.scrc.umanitoba.ca/~grdetil
Dept. Physiology, U. of Manitoba  Phone:  (204)789-3766
Winnipeg, MB  R3E 3J7  (Canada)   Fax:    (204)789-3930

------------------------------------ To unsubscribe from the htdig3-dev mailing list, send a message to htdig3-dev-unsubscribe@htdig.org You will receive a message to confirm this.



This archive was generated by hypermail 2b28 : Mon Jan 31 2000 - 14:56:36 PST