[htdig] patch to boost performance of HtWordType functions


Gilles Detillieux (grdetil@scrc.umanitoba.ca)
Tue, 24 Aug 1999 13:46:53 -0500 (CDT)


This patch should help speed up the performance of the HtWordType class's
inline functions. (That should help offset the performance penalty of the
compound word handling patch I hope to work on next.)

--- ../htdig-3.1.2.bak/htlib/HtWordType.h Wed Apr 21 21:47:58 1999
+++ ../htdig-3.1.2/htlib/HtWordType.h Tue Aug 24 13:28:31 1999
@@ -15,8 +15,8 @@
 // Inline friend-functions are used together with an all-statics
 // class (name that pattern!) to spare the user from having
 // to manage the valid_punctuation and extra_word_characters
-// attributes, while in theory still having the runtime
-// performance of strchr() + isalnum().
+// attributes, while in theory still having better runtime
+// performance than strchr() + isalnum().
 //
 
 class HtWordType
@@ -40,6 +40,7 @@ private:
     char *extra_word_characters; // Likewise.
     char *other_chars_in_word; // Attribute "valid_punctuation" plus
                                  // "extra_word_characters".
+ char chrtypes[256]; // quick lookup table for types
   } statics;
 
   // These methods are not supposed to be implemented (or accessed).
@@ -48,19 +49,25 @@ private:
   void operator=(const HtWordType &);
 };
 
+// Bits to set in chrtypes[]:
+#define HtWt_Alpha 0x01
+#define HtWt_Digit 0x02
+#define HtWt_Extra 0x04
+#define HtWt_ValidPunct 0x08
+
 // One for characters that when put together are a word
 // (including punctuation).
 inline int
 HtIsWordChar(int c)
 {
- return isalnum(c) || (c && strchr(HtWordType::statics.other_chars_in_word, c));
+ return (HtWordType::statics.chrtypes[(unsigned char)c] & (HtWt_Alpha|HtWt_Digit|HtWt_Extra|HtWt_ValidPunct)) != 0;
 }
 
 // Similar, but no punctuation characters.
 inline int
 HtIsStrictWordChar(int c)
 {
- return isalnum(c) || (c && strchr(HtWordType::statics.extra_word_characters, c));
+ return (HtWordType::statics.chrtypes[(unsigned char)c] & (HtWt_Alpha|HtWt_Digit|HtWt_Extra)) != 0;
 }
 
 // Let caller get rid of getting and holding a configuration parameter.
--- ../htdig-3.1.2.bak/htlib/HtWordType.cc Wed Apr 21 21:47:58 1999
+++ ../htdig-3.1.2/htlib/HtWordType.cc Tue Aug 24 13:28:52 1999
@@ -23,4 +23,17 @@ HtWordType::Initialize(Configuration &co
   HtWordType::statics.extra_word_characters = extra_word_chars;
   HtWordType::statics.valid_punctuation = valid_punct;
   HtWordType::statics.other_chars_in_word = punct_and_extra;
+ HtWordType::statics.chrtypes[0] = 0;
+ for (int i = 1; i < 256; i++)
+ {
+ HtWordType::statics.chrtypes[i] = 0;
+ if (isalpha(i))
+ HtWordType::statics.chrtypes[i] |= HtWt_Alpha;
+ if (isdigit(i))
+ HtWordType::statics.chrtypes[i] |= HtWt_Digit;
+ if (strchr(extra_word_chars, i))
+ HtWordType::statics.chrtypes[i] |= HtWt_Extra;
+ if (strchr(valid_punct, i))
+ HtWordType::statics.chrtypes[i] |= HtWt_ValidPunct;
+ }
 }

-- 
Gilles R. Detillieux              E-mail: <grdetil@scrc.umanitoba.ca>
Spinal Cord Research Centre       WWW:    http://www.scrc.umanitoba.ca/~grdetil
Dept. Physiology, U. of Manitoba  Phone:  (204)789-3766
Winnipeg, MB  R3E 3J7  (Canada)   Fax:    (204)789-3930

------------------------------------ To unsubscribe from the htdig mailing list, send a message to htdig@htdig.org containing the single word unsubscribe in the SUBJECT of the message.



This archive was generated by hypermail 2.0b3 on Tue Aug 24 1999 - 11:48:42 PDT