htdig: Yet another patch for htsearch.cc: No more modification of WORDS (the dead horse being beaten badly).


Hans-Peter Nilsson (hans-peter.nilsson@axis.com)
Wed, 13 Jan 1999 07:18:40 +0100


Ok, I see no reason to munge the user input (the default to use
in the form on the results-page). No more removal of "bad
words" or on-the-fly modifiers. I may be missing something,
like a valid reason for "parsing" the input, but hopefully
nothing obvious. This my third patch is hopefully the last
for this stuff.

Wed Jan 13 07:07:35 1999 Hans-Peter Nilsson <hp@axis.se>

        * htsearch/htsearch.cc (setupWords): Remove parsedWords parameter
        with accociated processing of original words - deletion of
        bad_words, spacing and on-the-fly modifiers.
        (main): Create originalWords from input, not via setupWords().

Index: htsearch.cc
===================================================================
RCS file: /opt/htdig/cvs/htdig3/htsearch/htsearch.cc,v
retrieving revision 1.19
diff -p -c -r1.19 htsearch.cc
*** htsearch.cc 1999/01/12 03:59:02 1.19
--- htsearch.cc 1999/01/13 06:10:02
*************** typedef void (*SIGNAL_HANDLER) (...);
*** 96,102 ****
  
  ResultList *htsearch(char *, List &, Parser *);
  
! void setupWords(char *, List &, String&, int, Parser *);
  void createLogicalWords(List &, String &, StringMatch &);
  void reportError(char *);
  void convertToBoolean(List &words);
--- 96,102 ----
  
  ResultList *htsearch(char *, List &, Parser *);
  
! void setupWords(char *, List &, int, Parser *);
  void createLogicalWords(List &, String &, StringMatch &);
  void reportError(char *);
  void convertToBoolean(List &words);
*************** main(int ac, char **av)
*** 122,128 ****
      int pageNumber = 1;
      StringMatch limit_to;
      StringMatch exclude_these;
- String originalWords;
      String logicalWords;
      StringMatch searchWordsPattern;
      StringList requiredWords;
--- 122,127 ----
*************** main(int ac, char **av)
*** 241,249 ****
      // Parse the words to search for from the argument list.
      // This will produce a list of WeightWord objects.
      //
! String tmp = input["words"];
! tmp.chop(" \t\r\n");
! setupWords(tmp, searchWords, originalWords,
                 strcmp(config["match_method"], "boolean") == 0,
                 parser);
  
--- 240,248 ----
      // Parse the words to search for from the argument list.
      // This will produce a list of WeightWord objects.
      //
! String originalWords = input["words"];
! originalWords.chop(" \t\r\n");
! setupWords(originalWords, searchWords,
                 strcmp(config["match_method"], "boolean") == 0,
                 parser);
  
*************** dumpWords(List &words, char *msg = "")
*** 373,384 ****
  }
  
  //*****************************************************************************
! // void setupWords(char *allWords, List &searchWords, String &parsedWords,
  // int boolean, Parser *parser)
  //
  void
! setupWords(char *allWords, List &searchWords, String &parsedWords,
! int boolean, Parser *parser)
  {
      List tempWords;
      int i;
--- 372,382 ----
  }
  
  //*****************************************************************************
! // void setupWords(char *allWords, List &searchWords,
  // int boolean, Parser *parser)
  //
  void
! setupWords(char *allWords, List &searchWords, int boolean, Parser *parser)
  {
      List tempWords;
      int i;
*************** setupWords(char *allWords, List &searchW
*** 400,433 ****
      WordList badWords; // Just used to check for valid words.
      badWords.BadWordFile(config["bad_word_list"]);
  
- //
- // Create a string with the original search words minus any attributes
- // and minus hidden keywords.
- //
- StringList origList(allWords, ' ');
- for (i = 0; i < origList.Count(); i++)
- {
- char *p = origList[i];
- if (mystrncasecmp(p, "hidden:", 7) == 0)
- {
- i++;
- continue;
- }
- if (mystrncasecmp(p, "exact:", 6) == 0)
- p += 6;
- if (mystrncasecmp(p, "hidden:", 7) == 0)
- {
- i++;
- continue;
- }
- if (boolean)
- parsedWords << p << ' ';
- else if (badWords.IsValid(p))
- parsedWords << p << ' ';
- }
-
- parsedWords.chop(' ');
-
      //
      // Convert the string to a list of WeightWord objects. The special
      // characters '(' and ')' will be put into their own WeightWord objects.
--- 398,403 ----

brgds, H-P
----------------------------------------------------------------------
To unsubscribe from the htdig mailing list, send a message to
htdig-request@sdsu.edu containing the single word "unsubscribe" in
the body of the message.



This archive was generated by hypermail 2.0b3 on Wed Jan 13 1999 - 09:13:07 PST