#!/usr/bin/perl # # uastat - gain User Agent Statistics from Newsserver via NNTP & XHDR # Well, it startet with the User Agent, now you can do much # more... # # (c) 1997,98 Gerd Knorr # ######################################################################## require 5.002; require "timelocal.pl"; use FileHandle; use Socket; ######################################################################## # config $newsserver = "$ENV{NNTPSERVER}"; $newsserver = "localhost" unless $newsserver; ######################################################################## sub read_answer() { local($i,$code,$line); undef $response; undef @lines; $response = ; ($code) = split(' ',$response); print STDERR "DEBUG: $response"; if ($code > 400) { print STDERR "Oops: $response"; exit; } if ($code ne "200" && $code ne "205" && $code ne "211") { for ($i = 0;; $i++) { $line = ; print STDERR $line; last if $line =~ /^\./; $lines[$i] = $line; printf STDERR "%6d\r",$i if ($i % 100) == 0; } } } sub print_answer() { local($i); print "$response"; return unless @lines; for ($i = 0; $i <= $#lines; $i++) { print "$lines[$i]"; } } ######################################################################## sub get_headerline () { local($nr,$hdr) = @_; print SOCK "xhdr $hdr $nr\r\n"; read_answer; $lines[0] =~ s/([0-9]*) //; $lines[0] =~ s/[\r\n]*$//; return $lines[0]; } sub trim_ua () { local($ua) = $_[0]; $ua =~ s/\(.*\)//; # cut off ( anything ) $ua =~ s/\[.*\]//; # cut off [ anything ] $ua =~ s/([vV]?)([-0-9.\/#]{2,})//g; # cut off version numbers $ua =~ s/ [Vv]ersion//; # cut off "version" $ua =~ s/ [Vv]er//; # same for "ver" $ua =~ s/ [Bb]eta//; # ... "beta" $ua =~ s/ [Aa]lpha//; # ... "alpha" $ua =~ s/ \S / /; # ... single chars $ua =~ s/ \S$//; # ... single chars # trim $ua =~ s/^\s*//; $ua =~ s/\s*$//; $ua =~ s/\s+/ /g; # special hacks for some user agents $ua =~ s/(Mozilla).*/$1/; $ua =~ s/(CrossPoint).*/$1/; $ua =~ s/(Gnus X?Emacs).*/$1/; return $ua; } sub trim_subject () { local($ua) = $_[0]; $ua =~ s/^R[Ee]: //; return $ua; } sub trim_from () { local($ua) = $_[0]; $ua =~ s/([^<])\s+<[^>]+>/$1/; $ua =~ s/.*\(([^)]+)\)/$1/; return $ua; } sub trim_date () { local($ua) = $_[0]; # cut time, weekday $ua =~ s/..:..:...*//; $ua =~ s/\w+,//; # trim $ua =~ s/^\s*//; $ua =~ s/\s*$//; $ua =~ s/\s+/ /g; # bring them to the same format: dd mon yyyy $ua =~ s/^(\d )/0$1/; $ua =~ s/ (\d\d)$/ 19$1/; return $ua; } sub trim_time () { local($ua) = $_[0]; $ua = sprintf("%02d:00 - %02d:00",$1,$1+1) if $ua =~ /(..):..:../; return $ua; } sub trim_ct () { local($ua) = $_[0]; $ua =~ tr/A-Z/a-z/; $ua =~ s/=\"(\S*)\"/=$1/g; $ua =~ s/boundary=\S*//; return $ua; } ############################################################################# # mime stuff $base64="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; @tobase64 = split(//,$base64); for ($i = 0; $i < 64; $i++) { $frombase64{$tobase64[$i]} = $i }; sub un_mime () { local($ua) = $_[0]; local($a,$b,$c); # quoted-printable if ($ua =~ s/=\?[^?]+\?[Qq]\?([^?]+)\?=/$1/g) { $ua =~ s/=([A-F0-9]{2})/sprintf("%c",hex($1))/ge; } # base64 if ($ua =~ s/=\?[^?]+\?[Bb]\?([^?]+)\?=/$1/) { for($a=$b=0,$c=""; $ua=~s/([A-Za-z0-9+\/])//;) { $a=($a<<6)|$frombase64{$1}; $b+=6; $c = $c . sprintf("%c",($a>>($b-=8)) & 0xff) if ($b>=8); } $ua = "$c"; } return $ua; } ############################################################################# # be user-friendly sub usage() { print <Header< -all print all results (default is to drop everything below 1%) -raw print unmodified results (defaults is to trim the lines a bit, remove version numbers etc.). -f Header Value use only articles where the Content of >Header< matches >Value<. >Value< is a regex. If it is'nt, perl will barf. Your fault. You probably want to use -all together with this one. -help print this text author: Gerd Knorr copying: GPL PLONK exit; } ######################################################################## # defaults $top = 1; $trim = 1; $filter = 0; $cut = 200; # parse args while ($ARGV[0] =~ /^-(.+)/) { if ($1 eq "all") { $top = 0; shift; } elsif ($1 eq "raw") { $trim = 0; shift; } elsif ($1 eq "from") { @headers = ( "Reply-To", "From"); $cut = 100; $trim_func = \&trim_from; shift; } elsif ($1 eq "subject") { @headers = ( "Subject"); $trim_func = \&trim_subject; shift; } elsif ($1 eq "date") { @headers = ( "Date"); $trim_func = \&trim_date; shift; } elsif ($1 eq "time") { @headers = ( "Date"); $trim_func = \&trim_time; shift; } elsif ($1 eq "ct") { @headers = ( "Content-Type"); $trim_func = \&trim_ct; shift; } elsif ($1 eq "h") { @headers = split / /,$ARGV[1]; shift; shift; } elsif ($1 eq "f") { $filter = 1; $fheader = $ARGV[1]; $fval = $ARGV[2]; shift; shift; shift; } elsif ($1 eq "help") { &usage(); } else { printf STDERR "unknown option $ARGV[0]\n"; exit 1; } } unless (@headers) { @headers = ( "X-Newsreader", "X-Mailer", "User-Agent"); $trim_func = \&trim_ua; } $group = $ARGV[0]; ######################################################################## unless ($group) { &usage(); } $port = getservbyname("nntp", 'tcp'); $iaddr = inet_aton($newsserver); $paddr = sockaddr_in($port, $iaddr); $proto = getprotobyname('tcp'); socket(SOCK, PF_INET, SOCK_STREAM, $proto) || die "socket: $!"; connect(SOCK, $paddr) || die "connect: $!"; SOCK->autoflush(1); # greeting read_answer; print SOCK "mode reader\r\n"; read_answer; # select group print SOCK "group $group\r\n"; read_answer; ($code,$dummy,$low,$high) = split (/\s+/,$response); printf STDERR "$group: $low-$high (%d articles)\n",$high-$low+1; # get from headers... foreach $header (@headers) { print SOCK "xhdr $header $low-$high\r\n"; read_answer; $n=0; $i=0; $j=0; for $line (@lines) { die unless $line =~ /([0-9]+)\s+([^\r\n]*)/; $n++; next if $ua[$n-1]; $i++; next if $2 eq "(none\)"; $j++; $ua[$n-1] = $2; } print STDERR "$header: got $j/$i\n"; } # filtering (well, this is the easy-to-implement way, not the fast one...) if ($filter) { print SOCK "xhdr $fheader $low-$high\r\n"; read_answer; $n=0; $i=0; $j=0; for $line (@lines) { die unless $line =~ /([0-9]+)\s+([^\r\n]*)/; $n++; $i++ unless $2 eq "(none\)"; if ($2 =~ /$fval/) { $j++; } else { $ua[$n-1] = "*** filtered ***"; } } print STDERR "$fheader: got $i/$n, $j/$i matches ($fval)\n"; } for ($i = 0; $i < $n; $i++) { $ua[$i] = "*** unknown ***" unless $ua[$i]; $ua[$i] = &un_mime($ua[$i]); $ua[$i] = &$trim_func($ua[$i]) if $trim_func and $trim == 1; } foreach $prog (@ua) { $stats{$prog}++; } $others=0; foreach $prog (sort { $stats{$b} <=> $stats{$a} } keys %stats) { if ($stats{$prog}*$cut > $n || $top == 0) { printf "%5.2f%% (%4d) %s\n",$stats{$prog}*100/$n,$stats{$prog},$prog; } else { $others += $stats{$prog}; } } printf "%5.2f%% (%4d) *** others ***\n",$others*100/$n,$others if $others; print SOCK "quit\r\n"; read_answer; close (SOCK); exit;