htdig: META Description patch


Geoff Hutchison (Geoffrey.R.Hutchison@williams.edu)
Thu, 16 Apr 1998 19:05:05 -0400


Hi all,

In the spirit of continuing patches, here's a patch against htdig-3.0.8b2 I
wrote on request from Brian Kariger. It defines a new config file option
"use_meta_description" which is false by default. Setting this to true will
check for <META NAME="description"> tags and set the excerpt to the content
of these if they exist (and aren't empty).

Comments, questions and bugs should be directed to me,
-Geoff Hutchison
Williams Students Online
http://wso.williams.edu/

*** htcommon/defaults.cc.orig Tue Jan 6 13:18:12 1998
--- htcommon/defaults.cc Sat Mar 21 10:33:47 1998
***************
*** 112,117 ****
--- 112,118 ----
      {"title_factor", "100"},
      {"url_list", "${database_base}.urls"},
      {"use_star_image", "true"},
+ {"use_meta_description", "false"},
      {"valid_punctuation", ".-_/!#$%^&*'"},
      {"version", HTDIG_VERSION},
      {"word_db", "${database_base}.words.gdbm"},
*** htdig/HTML.h.orig Sat Mar 21 13:31:49 1998
--- htdig/HTML.h Sat Mar 21 10:44:22 1998
***************
*** 45,50 ****
--- 45,51 ----
      int in_ref;
      int in_heading;
      int doindex;
+ int dohead;
      int minimumWordLength;
      URL *base;

*** htdig/HTML.cc.orig Sat Mar 21 21:12:00 1998
--- htdig/HTML.cc Sat Mar 21 20:41:50 1998
***************
*** 66,71 ****
--- 66,72 ----
      in_heading = 0;
      base = 0;
      doindex = 1;
+ dohead = 1;
      minimumWordLength = config.Value("minimum_word_length", 3);
  }

***************
*** 103,108 ****
--- 104,110 ----
      start = position;
      title = 0;
      head = 0;
+ dohead = 1;
      doindex = 1;
      in_heading = 0;
      in_title = 0;
***************
*** 231,237 ****
                  //
                  // Append the word to the head (excerpt)
                  //
! head << word;
              }

              if (word.length() >= minimumWordLength && doindex)
--- 233,240 ----
                  //
                  // Append the word to the head (excerpt)
                  //
! if (dohead)
! head << word;
              }

              if (word.length() >= minimumWordLength && doindex)
***************
*** 260,266 ****
                      //
                      if (!in_space)
                      {
! if (head.length() < max_head_length)
                          {
                              head << ' ';
                          }
--- 263,269 ----
                      //
                      if (!in_space)
                      {
! if (head.length() < max_head_length && dohead)
                          {
                              head << ' ';
                          }
***************
*** 280,286 ****
                      //
                      // Not whitespace
                      //
! if (head.length() < max_head_length)
                      {
                          head << *position;
                      }
--- 283,289 ----
                      //
                      // Not whitespace
                      //
! if (head.length() < max_head_length && dohead)
                      {
                          head << *position;
                      }
***************
*** 503,509 ****
          }

          case 19: // "li"
! if (doindex && head.length() < max_head_length)
                  head << "* ";
              break;

--- 506,512 ----
          }

          case 19: // "li"
! if (doindex && head.length() < max_head_length && dohead)
                  head << "* ";
              break;

***************
*** 588,593 ****
--- 591,608 ----
                  {
                      doindex = 0;
                  }
+ else if (mystrcasecmp(cache, "description") == 0
+ && config.Boolean("use_meta_description")
+ && strlen(conf["content"]) != 0)
+ {
+ head = conf["content"];
+ if (head.length() > max_head_length)
+ head = head.sub(0, max_head_length);
+ if (debug > 0)
+ cout << "META Description: " << conf["content"] << endl;
+ retriever.got_head(head);
+ dohead = 0;
+ }
              }
              else if (conf["name"] &&
                       mystrcasecmp(conf["name"], "htdig-noindex") == 0)

----------------------------------------------------------------------
To unsubscribe from the htdig mailing list, send a message to
htdig-request@sdsu.edu containing the single word "unsubscribe" in
the body of the message.



This archive was generated by hypermail 2.0b3 on Sat Jan 02 1999 - 16:26:02 PST