[htdig3-dev] Excerpts and Punctuation


Geoff Hutchison (ghutchis@wso.williams.edu)
Fri, 29 Jan 1999 00:39:41 -0400


OK, I promised a patch for the valid_punctuation problem. Here's a patch
that adds the original user input, with punctuation, to the StringMatch
used for excerpts.

However, I just noticed excerpt hilighting seems broken on my system. So I
can't test it out. :-( I did put in debugging output, so I know it's
setting the StringMatch correctly.

If someone could test this, I'd appreciate it. If someone can figure out
why my excerpt hilighting isn't working, I'd be very, very happy.

-Geoff

Index: htsearch.cc
===================================================================
RCS file: /opt/htdig/cvs/htdig3/htsearch/htsearch.cc,v
retrieving revision 1.22
diff -c -3 -r1.22 htsearch.cc
*** htsearch.cc 1999/01/21 13:41:24 1.22
--- htsearch.cc 1999/01/29 05:30:40
***************
*** 106,113 ****

  ResultList *htsearch(char *, List &, Parser *);

! void setupWords(char *, List &, int, Parser *);
! void createLogicalWords(List &, String &, StringMatch &);
  void reportError(char *);
  void convertToBoolean(List &words);
  void doFuzzy(WeightWord *, List &, List &);
--- 35,42 ----

  ResultList *htsearch(char *, List &, Parser *);

! void setupWords(char *, List &, int, Parser *, String &);
! void createLogicalWords(List &, String &, String &);
  void reportError(char *);
  void convertToBoolean(List &words);
  void doFuzzy(WeightWord *, List &, List &);
***************
*** 133,138 ****
--- 62,69 ----
      StringMatch limit_to;
      StringMatch exclude_these;
      String logicalWords;
+ String origPattern;
+ String logicalPattern;
      StringMatch searchWordsPattern;
      StringList requiredWords;
      int i;
***************
*** 266,280 ****
      originalWords.chop(" \t\r\n");
      setupWords(originalWords, searchWords,
                 strcmp(config["match_method"], "boolean") == 0,
! parser);

      //
      // Convert the list of WeightWord objects to a pattern string
      // that we can compile.
      //
! createLogicalWords(searchWords, logicalWords, searchWordsPattern);

      //
      // If required keywords were given in the search form, we will
      // modify the current searchWords list to include the required
      // words.
--- 197,220 ----
      originalWords.chop(" \t\r\n");
      setupWords(originalWords, searchWords,
                 strcmp(config["match_method"], "boolean") == 0,
! parser, origPattern);

      //
      // Convert the list of WeightWord objects to a pattern string
      // that we can compile.
      //
! createLogicalWords(searchWords, logicalWords, logicalPattern);

+ //
+ // Assemble the full pattern for excerpt matching and highlighting
      //
+ origPattern += logicalPattern;
+ searchWordsPattern.Pattern(origPattern);
+ searchWordsPattern.IgnoreCase();
+ if (debug)
+ cout << "Excerpt pattern: " << origPattern << "\n";
+
+ //
      // If required keywords were given in the search form, we will
      // modify the current searchWords list to include the required
      // words.
***************
*** 336,342 ****

//*****************************************************************************
void
! createLogicalWords(List &searchWords, String &logicalWords, StringMatch &wm)
  {
      String pattern;
      int i;
--- 276,282 ----

//*****************************************************************************
void
! createLogicalWords(List &searchWords, String &logicalWords, String &wm)
  {
      String pattern;
      int i;
***************
*** 368,375 ****
              pattern << ww->word;
          }
      }
! wm.IgnoreCase();
! wm.Pattern(pattern);

      if (debug)
      {
--- 308,314 ----
              pattern << ww->word;
          }
      }
! wm = pattern;

      if (debug)
      {
***************
*** 395,404 ****

//*****************************************************************************
// void setupWords(char *allWords, List &searchWords,
! // int boolean, Parser *parser)
  //
  void
! setupWords(char *allWords, List &searchWords, int boolean, Parser *parser)
  {
      List tempWords;
      int i;
--- 334,344 ----

//*****************************************************************************
// void setupWords(char *allWords, List &searchWords,
! // int boolean, Parser *parser, String &originalPattern)
  //
  void
! setupWords(char *allWords, List &searchWords, int boolean, Parser *parser,
! String &originalPattern)
  {
      List tempWords;
      int i;
***************
*** 456,463 ****
                      word << (char) t;
                      t = *pos++;
                  }
! word.remove(valid_punctuation);
! pos--;
                  if (boolean && mystrcasecmp(word.get(), "and") == 0)
                  {
                      tempWords.Add(new WeightWord("&", -1.0));
--- 396,402 ----
                      word << (char) t;
                      t = *pos++;
                  }
!
                  if (boolean && mystrcasecmp(word.get(), "and") == 0)
                  {
                      tempWords.Add(new WeightWord("&", -1.0));
***************
*** 472,477 ****
--- 411,419 ----
                  }
                  else
                  {
+ // Add word to excerpt matching list
+ originalPattern << word << "|";
+ word.remove(valid_punctuation);
                      WeightWord *ww = new WeightWord(word, 1.0);
                      if (!badWords.IsValid(word) ||
                          word.length() < minimum_word_length)
***************
*** 484,489 ****
--- 426,432 ----
                          tempWords.Add(ww);
                      }
                  }
+ pos--;
                  break;
              }
          }

------------------------------------
To unsubscribe from the htdig3-dev mailing list, send a message to
htdig3-dev@htdig.org containing the single word "unsubscribe" in
the SUBJECT of the message.



This archive was generated by hypermail 2.0b3 on Thu Feb 04 1999 - 22:24:21 PST