[htdig] Re: 3.2.0b2 - problem with either no stars, or infinite loop writing out (PR#846)


Subject: [htdig] Re: 3.2.0b2 - problem with either no stars, or infinite loop writing out (PR#846)
From: Gilles Detillieux (grdetil@scrc.umanitoba.ca)
Date: Mon May 15 2000 - 12:12:00 PDT


According to Joe.Sanderson@ecora.com:
> I've downloaded the 3.2.0b2 beta, and built on Linux. I have run into a
> problem involving the stars in the output html produced by htsearch.
>
> For all searches, the output html contains a number of star_blank
> references for all matches - I never get the star.gif references, no
> matter how good the match is.
>
> Another problem that may be related is that for some searches (have not
> yet seen a pattern of what search string causes this) htsearch goes into
> an infinite loop outputting <img src = "/htdig/star_blank.gif" alt = "
> "> to the output file.

Yes, Terry Luedtke reported this problem and posted a patch for it, to
htdig3-dev@htdig.org, back on May 3rd. It didn't seem to make it into
Joe's patch archive, so I'll repost it here for those who missed it.
The patch fixes a few bugs in the score calculation which cause the
problems in star generation.

Date: Wed, 03 May 2000 17:56:50 -0400
From: "Terry Luedtke" <LuedtkT@mail.nlm.nih.gov>
To: <grdetil@scrc.umanitoba.ca>
Cc: <htdig3-dev@htdig.org>
Subject: Re: [htdig3-dev] Too many stars

Gilles Detillieux <grdetil@scrc.umanitoba.ca> 03-May-00 17:24:00 >>>
>According to Terry Luedtke:
>> I'm not sure how to extract a patch, so here's the diff's for each file.
>
> diff -rup dir1 dir2 or diff -rc3p dir1 dir2
>
> Add an "N" to the options if you created new files, which you want included
>in the patch.

Hmm doesn't work with Solaris's diff. Built GNU's. Here's the changes for scoring.

Terry Luedtke
Natinal library of Medicine

diff -rup htdig-3.2.0b2.orig/htcommon/DocumentRef.h htdig-3.2.0b2/htcommon/DocumentRef.h
--- htdig-3.2.0b2.orig/htcommon/DocumentRef.h Tue Apr 11 18:53:19 2000
+++ htdig-3.2.0b2/htcommon/DocumentRef.h Wed May 3 15:13:39 2000
@@ -64,7 +64,7 @@ class DocumentRef : public Object
     ReferenceState DocState() {return docState;}
     int DocSize() {return docSize;}
     List *DocAnchors() {return &docAnchors;}
- int DocScore() {return docScore;}
+ double DocScore() {return docScore;}
     int DocSig() {return docSig;}
     int DocAnchor() {return docAnchor;}
     int DocHopCount() {return docHopCount;}
@@ -89,7 +89,7 @@ class DocumentRef : public Object
     void DocSig(int s) {docSig = s;}
     void DocAnchors(List &l) {docAnchors = l;}
     void AddAnchor(const char *a);
- void DocScore(int s) {docScore = s;}
+ void DocScore(double s) {docScore = s;}
     void DocAnchor(int a) {docAnchor = a;}
     void DocHopCount(int h) {docHopCount = h;}
     void DocEmail(const char *e) {docEmail = e;}
@@ -156,7 +156,7 @@ class DocumentRef : public Object
     //
     
     // This is the current score of this document.
- int docScore;
+ double docScore;
     // This is the nearest anchor for the search word.
     int docAnchor;
 
diff -rup htdig-3.2.0b2.orig/htsearch/ResultMatch.cc htdig-3.2.0b2/htsearch/ResultMatch.cc
--- htdig-3.2.0b2.orig/htsearch/ResultMatch.cc Tue Apr 11 18:53:21 2000
+++ htdig-3.2.0b2/htsearch/ResultMatch.cc Wed May 3 15:32:28 2000
@@ -79,8 +79,17 @@ ScoreMatch::compare(const void *a1, cons
 {
     ResultMatch *m1 = *((ResultMatch **) a1);
     ResultMatch *m2 = *((ResultMatch **) a2);
+ double score1 = m1->getScore();
+ double score2 = m2->getScore();
 
- return m2->getScore() - m1->getScore();
+ if(score1 == score2)
+ return 0;
+ else if(score1 < score2)
+ return 1;
+ else
+ return -1;
+
+ // return m2->getScore() - m1->getScore();
 }
 
 ResultMatch::CmpFun

diff -rup htdig-3.2.0b2.orig/htsearch/ResultMatch.h htdig-3.2.0b2/htsearch/ResultMatch.h
--- htdig-3.2.0b2.orig/htsearch/ResultMatch.h Tue Apr 11 18:53:21 2000
+++ htdig-3.2.0b2/htsearch/ResultMatch.h Wed May 3 15:08:53 2000
@@ -38,10 +38,10 @@ public:
         //
         void setAnchor(int a) {anchor = a;}
         void setID(int i) {id = i;}
- void setScore(float s) {score = s;}
+ void setScore(double s) {score = s;}
         
         int getAnchor() {return anchor;}
- int getScore() {return (int) score;}
+ double getScore() {return score;}
         int getID() {return id;}
 
         // Multiple database support
@@ -74,7 +74,7 @@ private:
             SortByID
         };
 
- float score;
+ double score;
         int anchor;
         int id;
         Collection *collection;

diff -rup htdig-3.2.0b2.orig/htsearch/Display.cc htdig-3.2.0b2/htsearch/Display.cc
--- htdig-3.2.0b2.orig/htsearch/Display.cc Tue Apr 11 18:53:21 2000
+++ htdig-3.2.0b2/htsearch/Display.cc Wed May 3 17:15:05 2000
@@ -293,7 +293,7 @@ Display::displayMatch(ResultMatch *match
         vars.Remove("ANCHOR");
       }
     
- vars.Add("SCORE", new String(form("%d", ref->DocScore())));
+ vars.Add("SCORE", new String(form("%f", ref->DocScore())));
     vars.Add("CURRENT", new String(form("%d", current)));
     char *title = ref->DocTitle();
     if (!title || !*title)
@@ -860,9 +860,12 @@ Display::generateStars(DocumentRef *ref,
     const String blank = config["star_blank"];
     double score;
 
+
+
     if (maxScore != 0)
     {
         score = (ref->DocScore() - minScore) / (maxScore - minScore);
+ if(debug) cerr << "generateStars: doc, min, max " << ref->DocScore() << ", " << minScore << ", " << maxScore <<endl;
     }
     else
     {
@@ -871,6 +874,8 @@ Display::generateStars(DocumentRef *ref,
     }
     int nStars = int(score * (maxStars - 1) + 0.5) + 1;
 
+ if(debug) cerr << "generateStars: nStars " << nStars << " of " << maxStars <<endl;
+
     if (right)
     {
         for (i = 0; i < maxStars - nStars; i++)
@@ -1121,19 +1126,20 @@ Display::buildMatchList()
         // We want older docs to have smaller values and the
         // ultimate values to be a reasonable size (max about 100)
 
- if (date_factor != 0.0 || backlink_factor != 0.0)
+ if (date_factor != 0.0)
         {
             score += date_factor *
               ((thisRef->DocTime() * 1000 / (double)time(0)) - 900);
+ }
   
+ if (backlink_factor != 0.0)
+ {
             int links = thisRef->DocLinks();
             if (links == 0)
               links = 1; // It's a hack, but it helps...
   
             score += backlink_factor
               * (thisRef->DocBackLinks() / (double)links);
- if (score <= 1.0)
- score = 1.0;
         }
 
         thisMatch->setTime(thisRef->DocTime());
@@ -1144,18 +1150,28 @@ Display::buildMatchList()
         // Get rid of it to free the memory!
         delete thisRef;
 
- thisMatch->setScore(1.0 + log(score));
+ score = log(1.0 + score);
+ thisMatch->setScore(score);
         thisMatch->setAnchor(dm->anchor);
                 
         //
         // Append this match to our list of matches.
         //
          matches.Add(thisMatch, url.get());
+
+ if (debug)
+ {
+ cerr << "score " << score << "(" << thisMatch->getScore() << "), maxScore " << maxScore <<", minScore " << minScore << endl;
+ }
  
          if (maxScore < score)
- maxScore = score;
+ {if(debug) cerr << "Set maxScore = score" <<endl;
+ maxScore = score;
+ }
          if (minScore > score)
+ {if(debug) cerr << "Set minScore = score" <<endl;
              minScore = score;
+ }
     }
   }
 

-- 
Gilles R. Detillieux              E-mail: <grdetil@scrc.umanitoba.ca>
Spinal Cord Research Centre       WWW:    http://www.scrc.umanitoba.ca/~grdetil
Dept. Physiology, U. of Manitoba  Phone:  (204)789-3766
Winnipeg, MB  R3E 3J7  (Canada)   Fax:    (204)789-3930

------------------------------------ To unsubscribe from the htdig mailing list, send a message to htdig-unsubscribe@htdig.org You will receive a message to confirm this.



This archive was generated by hypermail 2b28 : Mon May 15 2000 - 10:00:04 PDT