############################################
## ##
## WebTester ##
## by Darryl Burgdorf ##
## (e-mail burgdorf@awsd.com) ##
## ##
## last modified: 5/24/97 ##
## copyright (c) 1997 ##
## ##
## latest version is available from ##
## http://awsd.com/scripts/ ##
## ##
############################################
# COPYRIGHT NOTICE:
#
# Copyright 1997 Darryl C. Burgdorf. All Rights Reserved.
#
# This program may be used and modified free of charge by anyone, so
# long as this copyright notice and the header above remain intact. By
# using this program you agree to indemnify Darryl C. Burgdorf from any
# liability.
#
# Selling the code for this program without prior written consent is
# expressly forbidden. Obtain permission before redistributing this
# program over the Internet or in any other medium. In all cases
# copyright and header must remain intact.
# VERSION HISTORY:
#
# 1.04 05/24/97 Added trapping of "automount" paths
# Added handling of subpages contained in frames
# Fixed bug concerning raw directory references
# Bug caught & fixed by Petter Reinholdtsen
# 1.03 05/06/97 Made "site map" report optional
# Added ISDN to download time chart
# Trapped for TITLE tags with no title
# Fixed bug in handling of "exec cgi" calls
# Fixed "errors only / files not referenced" bug
# 1.02 04/19/97 Changed name from "WebMapper" to "WebTester"
# Added computation of total "download" file size
# (Hopefully) Improved speed of file parsing
# "Optionalized" parsing of CGI scripts
# Fixed bug affecting some SSI-based CGI calls
# The usual tweaks and minor bug fixes
# 1.01 02/27/97 Added ability to influence site map organization
# Added parsing of CGIs, SSIs and image maps
# Code adapted with permission from "Gen_Tree"
# Copyright (c) 1996 by Steffen Beyer
# Various minor bug fixes
# 1.00 02/03/97 Initial "public" release
sub MainProg {
require "find.pl";
&Initialize;
&SiteCheck;
if ($MapFile) { &SiteMap; }
exit;
}
sub Initialize {
$version = "1.04";
@days=(Sunday,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday);
@months=(January,February,March,April,May,June,
July,August,September,October,November,December);
$time=time;
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime($time);
$date="$days[$wday], $mday $months[$mon] 19$year";
@default_files = ('Welcome.html','welcome.html',
'index.html','index.shtml','README.html','index.cgi');
$temp_file = "/tmp/webtester.$$";
%OkStatusMsgs = (200,"OK",201,"Created",202,"Accepted",
203,"Partial Information",204,"No Response");
%FailStatusMsgs = (-1,"Could Not Look Up Server",-2,"Could Not Open Socket",
-3,"Could Not Bind Socket",-4,"Could Not Connect",301,"Found, But Moved",
302,"Found, But Data Resides Under a Different URL",
303,"Method",304,"Not Modified",400,"Bad Request",401,"Unauthorized",
402,"Payment Required",403,"Forbidden",404,"Not Found",
500,"Internal Error",501,"Not Implemented",
502,"Service Temporarily Overloaded",503,"Gateway Timeout",
600,"Bad Request",601,"Not Implemented",602,"Connection Failed",
603,"Timed Out");
$root = $LocalPath;
if ($LocalURL =~ m#^http://([\w-\.]+):?(\d*)($|/(.*))#) {
$root =~ s/$3//g;
}
if ($ParseCGI) { ($nbdy_uid,$nbdy_gid) = (getpwnam(nobody))[2,3]; }
else { $ParseCGI = "xxxxxxxxxx"; }
$AlsoAvoid = "[\.](gif|jpg|jpeg|tif|tiff|mpg|mpeg|gz|Z|tar|zip|txt)";
}
sub SiteCheck {
stat($InFile);
die "Cannot find file $InFile\n" unless -e $InFile;
open (LOG,">$OutFile");
print LOG "
",
"Site Check: $SiteName\n",
"\n",
"Site Check:
$SiteName
\n",
"This report was generated by ",
"",
"WebTester $version
\n";
$fileref = $InFile;
$fileref =~ s/$LocalPath/$LocalURL/g;
print LOG "Key File: ",
"$fileref\n
$date\n",
"
";
if ($ShowOnlyErrors == 1) { print LOG ""Errors Only" Report"; }
else { print LOG "Full Report"; }
print LOG "";
if ($IgnoreExternals == 1) {
print LOG "
(External URLs Not Checked)";
}
print LOG "
\n";
print LOG "
Index
\n";
unless ($ShowOnlyErrors == 1) {
print LOG "- General Site Map ",
"Information
\n",
"- Local Documents\n",
"
- "Download" ",
"File Sizes & Times\n",
"
- Directories\n",
"
- Images & Binaries\n",
"
- "File:" Links\n",
"
- "Mailto:" Links\n",
"
- "FTP:" Links\n",
"
- "Telnet:" Links\n",
"
- "Gopher:" Links\n",
"
- "News:" Links\n",
"
- External URLs\n";
}
print LOG "
- ",
"Bad Links & Other Problems
\n\n";
&Get_Refs($InFile,"");
unless ($ShowOnlyErrors == 1) {
&Print_List(%FileList,"Local Documents::document");
&Print_Sizes;
&Print_List(%DirList,"Directories::directory");
&Print_List(%ImageFileList,"Images & Binaries::image");
&Print_List(%ExtFileList,""File:" Links::file");
&Print_List(%MailList,""Mailto:" Links::mailto");
&Print_List(%FTPList,""FTP:" Links::ftp");
&Print_List(%TelnetList,""Telnet:" Links::telnet");
&Print_List(%GopherList,""Gopher:" Links::gopher");
&Print_List(%NewsList,""News:" Links::news");
&Print_List(%HTTPList,"External URLs::url");
}
&Print_List(%LostFileList,"Files Not Found::nofile");
&Print_List(%UnreadableList,"Files Not World Readable::badfile");
if ($MissingLinks == 1) {
&PrintMissingLinks;
}
&Print_List(%DirNotFoundList,"Directories Not Found::nodirectory");
&Print_List(%LostAnchorList,"Name Anchors Not Found::noanchor");
unless ($IgnoreExternals == 1) {
&Check_External_URLs(%HTTPList);
}
print LOG "
\n";
close (LOG);
}
sub SiteMap {
open (MAP,">$MapFile");
push (@SiteMap, $InFile);
$SiteLevel{$InFile} = 1;
foreach $key (1..10) {
foreach $filename (@SiteMap) {
next if ($SiteLevel{$filename} ne $key);
@references = split(/ /,$RefsTo{$filename});
foreach $reference (@references) {
next if ($SiteLevel{$reference});
next if (!($reference =~ /$LocalPath/));
next if ($reference =~ /$AlsoAvoid$/);
next if (($reference =~ /cgi-bin|cgi$|pl$/i) && !($reference =~ /$ParseCGI/));
next if (-d $reference);
next if ($MinLevel{$reference} > ($key+1));
push (@SiteMap, $reference);
$SiteLevel{$reference} = $SiteLevel{$filename}+1;
}
}
}
print MAP "\n";
&BuildMap($InFile);
print MAP "
\n";
close (MAP);
}
sub BuildMap {
local(@sortrefs);
local($lastref);
$fileref = $_[0];
$fileref =~ s/$LocalPath/$LocalURL/g;
unless ($_[0] eq $InFile) { print MAP ""; }
print MAP "";
if ($title{$_[0]}) { print MAP "$title{$_[0]}"; }
else { print MAP "$fileref"; }
print MAP "\n";
if ($_[0] eq $InFile) { print MAP "\n"; }
print MAP "
\n";
@references = split(/ /,$RefsTo{$_[0]});
@sortrefs = sort @references;
foreach $reference (@sortrefs) {
next if (!($SiteLevel{$reference}));
next if ($SiteLevel{$reference} ne ($SiteLevel{$_[0]}+1));
next if ($reference eq $lastref);
&BuildMap($reference);
$lastref = $reference
}
print MAP "
\n";
}
sub Get_Refs {
local(%newlist);
local(%bytesizer);
local($file);
local($dir);
local($Old_Dir);
local($filename);
$dir=&Dir_Name($_[0]);
if ($dir eq "") { $dir = &Get_PWD; }
$file=&Base_Name($_[0]);
if ($_[0] =~ m/.*(http:.*)/i) {
if (!defined($HTTPList{$1})) { $HTTPList{$1} = $_[1]; }
else { $HTTPList{$1} = "$HTTPList{$1} $_[1]"; }
if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $1; }
else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $1"; }
return;
}
if ($_[0] =~ m/.*(ftp:.*)/i) {
if (!defined($FTPList{$1})) { $FTPList{$1} = $_[1]; }
else { $FTPList{$1} = "$FTPList{$1} $_[1]"; }
if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $1; }
else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $1"; }
return;
}
if ($_[0] =~ m/.*(telnet:.*)/i) {
if (!defined($TelnetList{$1})) { $TelnetList{$1} = $_[1]; }
else { $TelnetList{$1} = "$TelnetList{$1} $_[1]"; }
if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $1; }
else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $1"; }
return;
}
if ($_[0] =~ m/.*(gopher:.*)/i) {
if (!defined($GopherList{$1})) { $GopherList{$1} = $_[1]; }
else { $GopherList{$1} = "$GopherList{$1} $_[1]"; }
if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $1; }
else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $1"; }
return;
}
if ($_[0] =~ m/.*(mailto:.*)/i) {
if (!defined($MailList{$1})) { $MailList{$1} = $_[1]; }
else { $MailList{$1} = "$MailList{$1} $_[1]"; }
if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $1; }
else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $1"; }
return;
}
if ($_[0] =~ m/.*(news:.*)/i) {
if (!defined($NewsList{$1})) { $NewsList{$1} = $_[1]; }
else { $NewsList{$1} = "$NewsList{$1} $_[1]"; }
if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $1; }
else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $1"; }
return;
}
if ($_[0] =~ m/.*(file:.*)/i) {
if (!defined($ExtFileList{$1})) { $ExtFileList{$1} = $_[1]; }
else { $ExtFileList{$1} = "$ExtFileList{$1} $_[1]"; }
if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $1; }
else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $1"; }
return;
}
if ($file eq "") {
if (-d $_[0]) {
if (!defined($DirList{$_[0]})) { $DirList{$_[0]} = $_[1]; }
else { $DirList{$_[0]} = "$DirList{$_[0]} $_[1]"; }
if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $_[0]; }
else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $_[0]"; }
}
else {
if (!defined($DirNotFoundList{$_[0]})) {
$DirNotFoundList{$_[0]} = $_[1];
}
else {
$DirNotFoundList{$_[0]} = "$DirNotFoundList{$_[0]} $_[1]";
}
if (!defined($BadRefsTo{$_[1]})) { $BadRefsTo{$_[1]} = $_[0]; }
else { $BadRefsTo{$_[1]} = "$BadRefsTo{$_[1]} $_[0]"; }
return;
}
}
$Old_Dir = &Get_PWD;
chdir($dir);
$dir = &Get_PWD;
if ($_[0] =~ m#(^/cgi-bin/.*)#i) {
$filename = $_[0];
$filename =~ s#^/cgi-bin#$CGIPath#oi;
}
else { $filename = $dir . $file; }
$filename =~ s/\?.*$//o;
return if ($filename eq $_[1]);
if ($filename =~ m/(.+)#(.+)/) {
$filename = "$1#$2";
if (!(&CheckAnchor($1,$2))) {
if (!defined($LostAnchorList{$filename})) {
$LostAnchorList{$filename} = $_[1];
}
else {
$LostAnchorList{$filename} = "$LostAnchorList{$filename} $_[1]";
}
if (!defined($BadRefsTo{$_[1]})) { $BadRefsTo{$_[1]} = $filename; }
else { $BadRefsTo{$_[1]} = "$BadRefsTo{$_[1]} $filename"; }
}
return;
}
if (-d $filename) {
$found = 0;
foreach $default_file (@default_files) {
if ((-f ($filename.'/'.$default_file)) || (-f ($filename.$default_file))) {
$dirname=$filename;
$file= $default_file;
$found = 1;
last;
}
}
if (! $found) {
if (!defined($LostFileList{$filename})) {
$LostFileList{$filename} = $_[1];
}
else {
$LostFileList{$filename} = "$LostFileList{$filename} $_[1]";
}
if (!defined($BadRefsTo{$_[1]})) { $BadRefsTo{$_[1]} = $filename; }
else { $BadRefsTo{$_[1]} = "$BadRefsTo{$_[1]} $filename"; }
return;
}
chdir($dirname);
$dir = &Get_PWD;
$filename = $dir . $file;
}
$fileseen{$filename} = 1;
if (! -f _) {
if (!defined($LostFileList{$filename})) {
$LostFileList{$filename} = $_[1];
}
else {
$LostFileList{$filename} = "$LostFileList{$filename} $_[1]";
}
if (!defined($BadRefsTo{$_[1]})) { $BadRefsTo{$_[1]} = $filename; }
else { $BadRefsTo{$_[1]} = "$BadRefsTo{$_[1]} $filename"; }
return;
}
if (-B _) {
if (!defined($ImageFileList{$filename})) { $ImageFileList{$filename} = $_[1]; }
else { $ImageFileList{$filename} = "$ImageFileList{$filename} $_[1]"; }
if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $filename; }
else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $filename"; }
return;
}
if (!defined($FileList{$filename})) {
$FileList{$filename} = $_[1];
if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $filename; }
else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $filename"; }
}
else {
$FileList{$filename} = "$FileList{$filename} $_[1]";
if (!defined($RefsTo{$_[1]})) { $RefsTo{$_[1]} = $filename; }
else { $RefsTo{$_[1]} = "$RefsTo{$_[1]} $filename"; }
return;
}
$mode = (stat($filename))[2];
$readmode = ($mode & 4);
if ($readmode == 0) {
if (!defined($UnreadableList{$filename})) {
$UnreadableList{$filename} = $_[1];
}
else {
$UnreadableList{$filename} = "$UnreadableList{$filename} $_[1]";
}
}
return if ($filename =~ /$AlsoAvoid$/);
return if ($Avoid && ($filename =~ /$Avoid/));
return unless (($filename =~ /$LocalPath/) | ($filename =~ /$CGIPath/));
return if (($filename =~ /cgi-bin|cgi$|pl$/i) && !($filename =~ /$ParseCGI/));
$err = 0;
if ($filename =~ /cgi-bin|cgi$|pl$/i) {
$( = $nbdy_gid;
$) = $nbdy_gid;
$< = $nbdy_uid;
$> = $nbdy_uid;
$cgifile = $filename;
$cgifile =~ s/$CGIPath/$CGIURL/g;
$cgifile =~ s/$LocalPath/$LocalURL/g;
$ENV{'HTTP_USER_AGENT'} = 'Mozilla/3.0';
$ENV{'SCRIPT_NAME'} = $cgifile;
open(HTML, "$filename |") || ($err = 1);
if ($err) { return; }
}
else {
open(HTML, $filename) || ($err = 1);
if ($err) { return; }
}
open(TEMP,">$temp_file") || die "Could not create $temp_file\n";
$offset=0;
do {
$size=read(HTML,$html_text,32768,$offset);
$offset=$offset+$size;
} until $size != 32768;
$bytesize{$filename} = $offset;
close(HTML);
if ($filename =~ /cgi-bin|cgi$|pl$/i) {
$< = 0;
$> = 0;
$( = 0;
$) = 0;
}
$html_text =~ s/\n/ /g;
if ($html_text =~ /([^<>]+)<\/TITLE>/i) { $title{$filename} = $1; }
$html_text =~ s/[^<]*//;
$html_text =~ s/(<[^>]*>)[^<]*/$1\n/g;
print TEMP "$html_text";
$html_text="";
close(TEMP);
open(HTML, $temp_file) || die "Could not open $temp_file\n";
while () {
chop;
if (/