#!/usr/bin/perl -w # toplog # From: Stephane Bortzmeyer use English; use strict; my ($logfile) = shift (@ARGV); my ($result, %results, $results, $page, $words, $request, @words, $word, %numbers); my ($characters) = '\wéèçàâôêöïÉÈ'; if (! $logfile) { die "Usage: $0 logfile"; } open (LOG, "< $logfile") or die "Cannot read $logfile: $OS_ERROR"; while () { chomp; $result = m/^([a-z]{3}\ \d+\ \d+:\d+:\d+)\ # Date ([a-z0-9]+)\ # Machine name (htsearch\[\d+\]:)\ # Program name and PID ([a-z0-9\-\.]+)\ # Client name or address (\[[a-z\-]+\])\ # Configuration file \((and|or|boolean)\)\ # Operator \[([$characters\'\"\-\?\!\&,;\+\* ]+)\]\ # Words \[([$characters\'\"\-\?\!\&,;\+\*\(\) ]+)\]\ # Logical words \(((\d+)\/\d+)\)\ # Results \-\ # Separator (\d+)\ # Page number /xi; if (! $result) { warn "Cannot parse \"$_\""; } else { $page = $11; $words = $7; $results = $10; if ($page == 1) { # Display resultst only for the first page $results{$words} = $results; @words = split ('\s|,', $words); foreach $word (@words) { if ((! $word) or ($word =~ /^(and|et|or|ou|de|le)$/i)) { next; } $numbers{$word}++; } } } } close (LOG); print "\n-- NUMBER OF REQUESTS PER WORD --\n"; foreach $word (reverse sort by_numbers keys %numbers) { print "$word: $numbers{$word}\n"; } print "\n-- NUMBER OF RESULTS PER QUESTION --\n"; foreach $request (reverse sort by_results keys %results) { print "$request: $results{$request}\n"; } sub by_results { $results{$a} <=> $results{$b}; } sub by_numbers { $numbers{$a} <=> $numbers{$b}; }