#!/bin/sh # # digidir # # Creates a standalone HTDIG searchable index of a directory of web or # text documents that can be put onto CD-ROM or traded with other # machines. If put on a CD-ROM, the CD-ROM can be mounted directly # under /home/httpd/html with no further configuration required. # # Usage: # # $ cd /home/httpd/html # $ mkdir documents # $ [fill "documents" dorectory with text or HTML files ] # $ digdir documents # # At this point, the directory "documents" contains the # original documents themselves, as well as a search.html # search form, and all "htdig" config and database files # (hidden under documents/.htdig). This directory can # be moved to any other machine. # # NOTE: as of htdig 3.1.x, the HTDIG database is architecture # dependant, so the database can only be moved to machines # of like architecture. This is a design flaw in htdig. # # Requirments: # On the machine which is used to initially generate the index, # HTDIG 3.1.x must be installed. Get it from http://www.htdig.org/ # # On the machine which is use to search the database, only # /home/httpd/html/cgi-bin/htsearch (from HTDIG 3.1.x) must exist. # # Author: # Rick Richardson, rick@digi.com, November 1999. # # This software is donated to the PUBLIC DOMAIN and may be used for # any purpose without restriction. No warrantees expressed or # implied. Your mileage may vary. # error() { echo "digdir error: $*" exit 1 } DIR="$1" PATH=$PATH:/usr/sbin [ -d "$DIR" ] || error "directory name missing or non-existant" case "$DIR" in /*) error "directory name must be relative to current directory";; esac cd $DIR || error "can't chdir to $DIR" HERE=`pwd` # # Remove the old database and copy in a fresh set of HTDIG # distribution files. # rm -f search.html rm -rf .htdig mkdir .htdig || error "can't make directory $HERE/.htdig" ( cd /var/lib/htdig; find common ! -name 'db.*' ! -name '*.db' | cpio -pudm $HERE/.htdig ) mkdir .htdig/db || error "can't make directory $HERE/.htdig" # # Make a copy of the matching htsearch binary, so that # somebody who gets a copy of this index and doesn't # have a matching htsearch binary handy can just grab # it from here and stash it in cgi-bin. Also copy this # shell script in case somebody wants to regen the index. # cp -a /home/httpd/cgi-bin/htsearch $0 .htdig/ || error "can't find htsearch binary" # # Create two config files, one for htdig and one for htsearch # # Using two config files allows us to eliminate any appearance # of an absolute URL (one with a domain name, even localhost) # in the results, thus making the database portable. # # We convert the output URL to ../$DIR because the browsers # idea of the current directory will be cgi-bin. # DCONF=$HERE/.htdig/htdig.conf SCONF=$HERE/.htdig/htsearch.conf cp /etc/htdig/htdig.conf $DCONF cp /etc/htdig/htdig.conf $SCONF cat <<-EOF >> $DCONF database_dir: $HERE/.htdig/db common_dir: $HERE/.htdig/common start_url: http://localhost/$DIR/ local_urls: http://localhost/$DIR/=/home/httpd/html/$DIR/ local_user_urls: http:/=/home/,/public_html/ url_part_aliases: http://localhost/$DIR *$DIR EOF cat <<-EOF >> $SCONF database_dir: $HERE/.htdig/db common_dir: $HERE/.htdig/common start_url: http://localhost/$DIR/ local_urls: http://localhost/$DIR/=/home/httpd/html/$DIR/ local_user_urls: http:/=/home/,/public_html/ url_part_aliases: http:../$DIR *$DIR EOF # # Generate the database using HTDIG # htdig -v -c $DCONF -i htmerge -c $DCONF htnotify -c $DCONF htfuzzy -c $DCONF endings htfuzzy -c $DCONF synonyms # # Create the initial search page # CGI="http:/cgi-bin/htsearch?-c$SCONF" cat <<-EOF > search.html ht://Dig WWW Search of $DIR

ht://Dig WWW Site Search


This search will allow you to search the contents of all documents under this directory.

Match: Format: Sort by:
Search:

EOF # # Fixup the templates that create the refine page, etc. # # Change these from method GET to POST so that the # -c$SCONF option will work. # for i in header nomatch syntax wrapper do ex .htdig/common/$i.html <<-EOF g#.(CGI)#s##$CGI# g#method=.get.#s##method="post"# w q EOF done