Sure, most forums are 99% lurkers, and the code took a bit of work, just curious. I see how many people read my post, and they can study source to figure out who I am.
Here's a simple Perl script that can extract this kind of information from my apache server. No idea how portable the code is, but it works for me:
Code:
#!/usr/bin/perl
use warnings;
use strict;
# apachelog.pl -- Perl script for extracting reports from /usr/local/apache/logs/access_log
#
# Copyright (c) 2006 Dave Bayer
# Subject to the terms and conditions of the MIT License.
(my $help = <<'EOF') =~ s/^: ?//mg;
:
: Usage:
:
: apachelog.pl count path
: count distinct accesses
: (in all, how many distinct visitors?)
:
: apachelog.pl who path
: list distinct accesses by caller
: (who were the visitors?)
:
: apachelog.pl when path
: list distinct accesses by count for each date
: (for each date, how many distinct visitors?)
:
: apachelog.pl what path
: list distinct accesses by ascending count
: (for each distinct visitor, what did they access?)
:
: apachelog.pl first path
: list distinct first accesses by ascending count
: (for each distinct visitor, what did they access first?)
:
: Examples:
:
: apachelog.pl what ''
: apachelog.pl when bayer/coffee
:
EOF
my $get = ' "GET /(?:~|%[0-9a-fA-F]{2})*';
sub openlog {
open LOG, "</usr/local/apache/logs/access_log" or die "unable to read /usr/local/apache/logs/access_log\n";
}
sub convertdate {
local $_;
($_) = @_;
s/(J|j)an[a-z]*/01/;
s/(F|f)eb[a-z]*/02/;
s/(M|m)ar[a-z]*/03/;
s/(A|a)pr[a-z]*/04/;
s/(M|m)ay[a-z]*/05/;
s/(J|j)un[a-z]*/06/;
s/(J|j)ul[a-z]*/07/;
s/(A|a)ug[a-z]*/08/;
s/(S|s)ep[a-z]*/09/;
s/(O|o)ct[a-z]*/10/;
s/(N|n)ov[a-z]*/11/;
s/(D|d)ec[a-z]*/12/;
s:(\d*)/(\d*)/(\d*):$3-$2-$1:;
$_;
}
sub who {
my ($match) = @_;
my (%who, $addr);
openlog();
while (<LOG>) {
/$get$match/ or next;
($addr = $_) =~ s/^(\S*)\s.*\n/$1/;
$who{$addr}++;
}
close LOG;
%who;
}
sub when {
my ($match) = @_;
my (%when, $date, $addr);
openlog();
while (<LOG>) {
/$get$match/ or next;
($date = $_) =~ s/^[^[]*\[([^:]*):.*\n/$1/;
($addr = $_) =~ s/^(\S*)\s.*\n/$1/;
$when{convertdate($date)}{$addr}++;
}
close LOG;
%when;
}
sub what {
my ($match) = @_;
my (%what, $file, $addr);
openlog();
while (<LOG>) {
/$get$match/ or next;
($file = $_) =~ s/^.*?$get($match\/?[^&#%\?\/\s]*).*\n/$1/;
$file =~ s/\/$//;
($addr = $_) =~ s/^(\S*)\s.*\n/$1/;
$what{$file}{$addr}++;
}
close LOG;
%what;
}
sub first {
my ($match) = @_;
my (%start, %first, $file, $addr);
openlog();
while (<LOG>) {
/$get$match/ or next;
($file = $_) =~ s/^.*?$get($match\/?[^&#%\?\/\s]*).*\n/$1/;
$file =~ s/\/$//;
($addr = $_) =~ s/^(\S*)\s.*\n/$1/;
if (defined $start{$addr}) { next; }
$start{$addr} = $file;
}
close LOG;
foreach $addr (keys %start) {
$first{$start{$addr}}++;
}
%first;
}
sub count_cmd {
my %who = who(@_);
printf "%d\n", scalar keys %who;
}
sub who_cmd {
my %who = who(@_);
foreach my $addr (sort keys %who) {
print "$addr\n";
}
}
sub when_cmd {
my %when = when(@_);
foreach my $date (sort keys %when) {
printf "%8d %s\n", scalar keys %{$when{$date}}, $date;
}
}
sub what_cmd {
my %what = what(@_);
my @list;
foreach my $file (sort keys %what) {
push @list, sprintf "%8d %s\n", scalar keys %{$what{$file}}, $file;
}
print sort @list;
}
sub first_cmd {
my %first = first(@_);
my @list;
foreach my $file (sort keys %first) {
push @list, sprintf "%8d %s\n", $first{$file}, $file;
}
print sort @list;
}
my %run = (
'count' => \&count_cmd,
'who' => \&who_cmd,
'when' => \&when_cmd,
'what' => \&what_cmd,
'first' => \&first_cmd,
);
my ($do, $match) = @ARGV;
if ($#ARGV != 1 or not $run{$do}) {
print $help;
exit(1);
}
$run{$do}($match);