#!/usr/local/bin/perl # # A simple CGI script that reads a list of words and produces # a page with some suggested word-based URLs to go look at. # # script by David Chess, http://www.davidchess.com/, September 1999 # this code is hereby committed to the public domain, for the # furtherance of the general happiness of humankind. # use strict; # Perhaps track referers, to see how famous we are. my $reflog = ""; if ($reflog) { if (defined $ENV{HTTP_REFERER}) { if (open FH,">>$reflog") { print FH timestamp()." WordURL.cgi: ".$ENV{HTTP_REFERER}." (".$ENV{REMOTE_HOST}.")\n"; close FH } } } srand(); my @wordlist; # A naughty global variable print <<'EOD'; Content-type: text/html Toys -- Random URLs to try EOD my $bodytext = getBodyText(); # bodytext() does all the hard work if (not $bodytext) { # we just apologize if it fails print <<'EOD';
Ooops! Something seems to have gone wrong. Most likely the list of words is missing or damaged. Sorry about that...
EOD exit; } my $now = gmtime(); # and we write little headers and footers print <<"EOD";

Some random URLs to try

$bodytext


This page generated dynamically on $now,
by a script from the toys page on DavidChess.com. Not a fat-free food.

EOD exit; # and that's all! # # This is where the real work gets done # sub getBodyText { my $answer; local *FH; # read the words from "words.txt" open FH, "; close FH; # Write the introduction $answer .= <<'EOD'; Most/all common English words have already been registered as domain names in the COM top-level (by professional collectors like Digimedia or Telepathy.com if noone else). So have many combinations and obvious extensions of words. This is one sometimes-amusing way of finding new sites to read and absorb HTML formatting tricks from. Here are a few randomly-generated ones; reload this page to get some more.

(Note that while someone probably owns many/most/all of the domains pointed to below, that doesn't mean that they're all correctly resolved today, or that they all have HTTP daemons listening on them. But many will. Try and see! To see if a domain-name is taken, click on the little "NSI" or "all" by the URL, or go somewhere like this or (for more names but less info) this.)

www.[word].com; almost certainly taken

EOD # Two table columns, each with five simple URLs foreach (1..2) { $answer .= "\n"; } $answer .= <<'EOD';
\n"; foreach (1..5) { $answer .= word_URL_link()."
\n"; } $answer .= "

More complex; perhaps taken, perhaps not

EOD # Then two columns with complex URLs foreach (1..2) { $answer .= "\n"; } $answer .= <<'EOD';
\n"; foreach (1..5) { $answer .= word_URL_link(1)."
\n"; } $answer .= "

EOD return $answer; } # # Well, OK, *this* is where the real work gets done! No? # sub word_URL_link { my $domain = word_domain(@_); my $url = "http://www.$domain/"; my $answer = qq#$url#; my $nsiquery = "http://www.networksolutions.com/cgi-bin/whois/whois?STRING=$domain"; my $allquery = "http://www.nsiregistry.net/cgi-bin/whois?whois_nic=$domain"; $answer .= qq#  NSI#; $answer .= qq# all#; return $answer; } # # This time for sure... # sub word_domain { my $kind = shift; my $baseword = random_word(); # Pick a basic word return "$baseword.com" if not $kind; # The simple case return "$baseword.net" if rand()<0.04; # The simple word.net return "$baseword.org" if rand()<0.05; # and word.org cases my $top = (rand()<0.04) ? "net" : (rand()<0.05) ? "org" : "com"; my $bigword; # "foo" => "fooer", "wire" => "wirer". rarely. if (rand()<0.02) { $baseword .= "er"; $baseword =~ s/eer$/er/; return "$baseword.$top" if rand()<0.1; } # "door" => "dooring", "wire" => "wiring". rarely. if (rand()<0.01) { $baseword .= "ing"; $baseword =~ s/eing$/ing/; return "$baseword.$top" if rand()<0.1; } # "boy" => "boys", "dress" => "dresses". rarely. if (rand()<0.02) { $baseword .= "s"; $baseword =~ s/ss$/ses/; return "$baseword.$top" if rand()<0.1; } # "xyz.whatever", rarely if (rand()<0.03) { $baseword = ('a'..'z')[int(rand(26))]. ('a'..'z')[int(rand(26))]. ('a'..'z')[int(rand(26))]; return "$baseword.$top" if rand()<0.5; } my $roll = rand(); # Stick another word before, or after, or both, the base word if ($roll<0.45) { $bigword = random_prefix()."\u$baseword"; } elsif ($roll<0.9) { $bigword = "\u$baseword".random_suffix(); } else { $bigword = random_prefix()."\u$baseword".random_suffix(); } # and we're done! return "$bigword.$top"; } sub random_word { my $answer = @wordlist[int(rand($#wordlist))]; chomp $answer; $answer = lc($answer); $answer =~ s/\W//g; return $answer; } sub random_prefix { if (rand()<0.5) { my $newword = random_word(); return "\u$newword"; } my @list = qw"Live e i Extreme The Big Hot for Cyber New Great Buy Open Easy Real Net My Web Super"; return $list[int(rand(@list))]; } sub random_suffix { if (rand()<0.3) { my $newword = random_word(); return "\u$newword"; } my @list = qw"Live Now City World Online Net Deals Street Central News Mart Web House Boy Girl Bot"; return $list[int(rand(@list))]; } sub timestamp { my @a = gmtime(); return sprintf "%04d/%02d/%02d %02d:%02d:%02d GMT", 1900+$a[5], $a[4]+1, $a[3], @a[2,1,0]; }