Geoff Hutchison (Geoffrey.R.Hutchison@williams.edu)
Thu, 16 Apr 1998 19:05:05 -0400
Hi all,
In the spirit of continuing patches, here's a patch against htdig-3.0.8b2 I
wrote on request from Brian Kariger. It defines a new config file option
"use_meta_description" which is false by default. Setting this to true will
check for <META NAME="description"> tags and set the excerpt to the content
of these if they exist (and aren't empty).
Comments, questions and bugs should be directed to me,
-Geoff Hutchison
Williams Students Online
http://wso.williams.edu/
*** htcommon/defaults.cc.orig Tue Jan 6 13:18:12 1998
--- htcommon/defaults.cc Sat Mar 21 10:33:47 1998
***************
*** 112,117 ****
--- 112,118 ----
{"title_factor", "100"},
{"url_list", "${database_base}.urls"},
{"use_star_image", "true"},
+ {"use_meta_description", "false"},
{"valid_punctuation", ".-_/!#$%^&*'"},
{"version", HTDIG_VERSION},
{"word_db", "${database_base}.words.gdbm"},
*** htdig/HTML.h.orig Sat Mar 21 13:31:49 1998
--- htdig/HTML.h Sat Mar 21 10:44:22 1998
***************
*** 45,50 ****
--- 45,51 ----
int in_ref;
int in_heading;
int doindex;
+ int dohead;
int minimumWordLength;
URL *base;
*** htdig/HTML.cc.orig Sat Mar 21 21:12:00 1998
--- htdig/HTML.cc Sat Mar 21 20:41:50 1998
***************
*** 66,71 ****
--- 66,72 ----
in_heading = 0;
base = 0;
doindex = 1;
+ dohead = 1;
minimumWordLength = config.Value("minimum_word_length", 3);
}
***************
*** 103,108 ****
--- 104,110 ----
start = position;
title = 0;
head = 0;
+ dohead = 1;
doindex = 1;
in_heading = 0;
in_title = 0;
***************
*** 231,237 ****
//
// Append the word to the head (excerpt)
//
! head << word;
}
if (word.length() >= minimumWordLength && doindex)
--- 233,240 ----
//
// Append the word to the head (excerpt)
//
! if (dohead)
! head << word;
}
if (word.length() >= minimumWordLength && doindex)
***************
*** 260,266 ****
//
if (!in_space)
{
! if (head.length() < max_head_length)
{
head << ' ';
}
--- 263,269 ----
//
if (!in_space)
{
! if (head.length() < max_head_length && dohead)
{
head << ' ';
}
***************
*** 280,286 ****
//
// Not whitespace
//
! if (head.length() < max_head_length)
{
head << *position;
}
--- 283,289 ----
//
// Not whitespace
//
! if (head.length() < max_head_length && dohead)
{
head << *position;
}
***************
*** 503,509 ****
}
case 19: // "li"
! if (doindex && head.length() < max_head_length)
head << "* ";
break;
--- 506,512 ----
}
case 19: // "li"
! if (doindex && head.length() < max_head_length && dohead)
head << "* ";
break;
***************
*** 588,593 ****
--- 591,608 ----
{
doindex = 0;
}
+ else if (mystrcasecmp(cache, "description") == 0
+ && config.Boolean("use_meta_description")
+ && strlen(conf["content"]) != 0)
+ {
+ head = conf["content"];
+ if (head.length() > max_head_length)
+ head = head.sub(0, max_head_length);
+ if (debug > 0)
+ cout << "META Description: " << conf["content"] << endl;
+ retriever.got_head(head);
+ dohead = 0;
+ }
}
else if (conf["name"] &&
mystrcasecmp(conf["name"], "htdig-noindex") == 0)
----------------------------------------------------------------------
To unsubscribe from the htdig mailing list, send a message to
htdig-request@sdsu.edu containing the single word "unsubscribe" in
the body of the message.
This archive was generated by hypermail 2.0b3 on Sat Jan 02 1999 - 16:26:02 PST