#!/bin/perl
# validate.pl - a CGI script to validate changed HTML & CSS files
# on a Web site
#
#
# The purpose of this script is to make checking the validity of a
# site's HTML & CSS easier. It employs the online HTML & CSS
# validators offered by the World Wide Web consortium at w3.org to
# do the validation.
#
# This script:
#
# 1. Finds all files that have changed since the last time
# the script was run.
#
# 2. Submits each file to the appropriate validator.
#
# 3. Saves each file's results, aftering cleaning them up a
# little.
#
# 4. Returns an HTML page with a list of valid files and a list
# of files with errors. If there are files with errors, the
# details of each error is listed.
#
# If there are no files that have changed since the last time the
# script was run, a message to that effect is printed.
#
# If there are
#
# Requirements:
#
# 1. This script must be run from a Web server's CGI directory. It
# returns an HTML page viewable in any browser.
#
# 2. A dummy file whose timestamp is used to record the last time
# the scripts was run. This file must be created manually. See
# the "globals" section.
#
# 3. The availability of the Unix utilities 'touch' and 'find'.
# Free versions for Windowsare available from CygWin.
#
# Customization: the lines with !!! comments can be customized.
#
# This script has been tested with several browsers, Windows 95,
# the CygWin utilities, Perl 5.6.0, and the TinyWeb server.
#
# Mark L. Irons
# 2 August 2002
use LWP::Simple;
#----------------------------------------------------------------------
# globals
#----------------------------------------------------------------------
# customize these for your site
$WebHome = "/www/"; # !!!
$validationTimestampFileName = "timestamp.of.last.validation"; # !!!
$validationTimestampFile = $WebHome."timestamp.of.last.validation"; # !!!
# define validators & associated text strings to search their output for
# (these shouldn't need modification)
# HTML
$Validator{"HTML"} = "http://validator.w3.org/check?uri=";
$GoodMessage{"HTML"} = "No errors found!";
$OpenDelimiter{"HTML"} = "
";
$CloseDelimiter{"HTML"} = "
";
# CSS
$Validator{"CSS"} = "http://jigsaw.w3.org/css-validator/validator?uri=";
$GoodMessage{"CSS"} = "
No error or warning found
";
$OpenDelimiter{"CSS"} = "";
$CloseDelimiter{"CSS"} = "\n\n
\n\n";
#----------------------------------------------------------------------
# Exit with message if the timestamp file doesn't exist.
#----------------------------------------------------------------------
if (!-e $validationTimestampFile) {
&printPreamble;
print <Validation Failed
The timestamp file $validationTimestampFile
doesn't exist.
You need to create it.
EndOfNoTimestamp
&printPostamble;
exit;
}
#----------------------------------------------------------------------
# Get list of files that have changed since the last time this script
# was run
#----------------------------------------------------------------------
open(NEWURLS,"find $WebHome ( -name '*.html' -or -name '*.css' ) -newer $validationTimestampFile |")
|| die "Couldn't find changed files: $!\n";
while () {
chop;
push(@URLsToValidate,$_);
}
close(NEWURLS);
#----------------------------------------------------------------------
# If there's nothing to validate, tell the user so, update the
# timestamp, and exit.
#----------------------------------------------------------------------
if (scalar(@URLsToValidate) == 0) {
&printPreamble;
&updateTimestamp;
print <No Files Need Validation
EndOfNothingToValidate
&printPostamble;
exit;
}
#----------------------------------------------------------------------
# Submit each URL to its appropriate validator, and save the results
#----------------------------------------------------------------------
foreach $url (sort @URLsToValidate) {
# determine the file type, HTML or CSS
if ($url =~ /\.html$/) { $filetype = "HTML"; }
elsif ($url =~ /\.css$/) { $filetype = "CSS"; }
# submit to validator
$_ = get($Validator{$filetype}.$url);
# process the results
#
#
if (/$GoodMessage{$filetype}/) {
push(@goodURLs,$url); # add to good list
}
else {
# pull out the error messages and save them
# for HTML, error messages are everything inside
# for CSS, error messages are everything inside
$pattern = "($OpenDelimiter{$filetype}.*?$CloseDelimiter{$filetype})";
/$pattern/s;
# at the top of each HTML error section, print the URL for clarity
if ($filetype eq "HTML") {
$URLerrors{$url} = ""
}
$errorHTML = $1;
# remove the now-redundant "Errors" header from the CSS results
$errorHTML =~ s#Errors
##;
# remove line links from HTML error sections
$errorHTML =~ s/\1<\/a>/\1/g;
# save the error report for each URL
$URLerrors{$url} .= $errorHTML;
push(@badURLs,$url); # add to bad list
}
}
#----------------------------------------------------------------------
# Create the page of results
#----------------------------------------------------------------------
&printPreamble;
&updateTimestamp;
print "Validation Results
\n";
$numgood = scalar(@goodURLs);
$numbad = scalar(@badURLs);
if ($numgood > 0) {
print "Okay
\n\n";
while ($url = shift @goodURLs) {
print " - $url
\n";
}
print "
\n";
}
if ($numbad > 0) {
print "Files Containing Errors
\n\n";
$counter = 0;
while ($url = shift @badURLs) {
$urllist .= " - $url details
\n";
$errors .= "";
$errors .= $URLerrors{$url};
$errors .= " Back
";
$counter++;
}
print "$urllist\n
\n
\n$errors\n";
}
&printPostamble;
#======================================================================
# SUB updateTimestamp
#======================================================================
# Touch the validation timestamp file; write an error if the attempt
# fails
sub updateTimestamp {
$result = system("touch $validationTimestampFile");
if ($result != 0) {
print "Couldn't touch the timestamp file $validationTimestampFile
. Please touch this file manually.
";
}
}
#======================================================================
# SUB printPreamble
#======================================================================
sub printPreamble {
print <
URL Validation Report
EndPreamble
}
#======================================================================
# SUB printPostamble
#======================================================================
sub printPostamble {
print <
EndPostamble
}