|
The Mangler
0 #!/usr/bin/perl
1 # File: mangler.cgi
2
3 use LWP::UserAgent;
4 use HTML::Parse;
5 use HTTP::Status;
6 use CGI qw(:standard :html3);
7 $ICON = "pow.gif";
8
9 srand();
10
11 $url_to_mangle = param(’mangle’) if request_method() eq ’POST’;
12
13
14 print header();
15
16 if ($url_to_mangle && mangle($url_to_mangle)) {
17 ; # nothing to do
18 } else {
19 prompt_for_url();
20 }
21
22 # ---------------------------------------------------
23 # THIS SECTION IS WHERE URLs ARE FETCHED AND MANGLED
24 # ---------------------------------------------------
25 sub mangle {
26 my $url = shift;
27 my $agent = new LWP::UserAgent;
28 my $request = new HTTP::Request(’GET’,$url);
29 my $response = $agent->request($request);
30
31 unless ($response->isSuccess) {
32 print h1(’Error Fetching URL’),
33 "An error occurred while fetching the document located at ",
34 a({href=>$url},"$url."),
35 p(),
36 "The error was ",strong(statusMessage($response->code)),".",
37 hr();
38 return undef;
39 }
40
41 # make sure that it’s an HTML document!
42 my $type = $response->header(’Content-type’);
43 unless ($type eq ’text/html’) {
44 print h1("Document isn’t an HTML File!"),
45 "The URL ",a({href=>$url},"$url"),
46 " is a document of type ",em($type),". ",
47 "Please choose an HTML file to mangle.",
48 hr();
49 return undef;
50 }
51
52 print start_html(-title=>’Mangled Document’,
53 -xbase=>$url),
54 div({-align=>CENTER},
55 h1("The Mangler"),
56 strong(a({-href=>$url},$url))
57 ),
58 p(),
59 a({-href=>self_url()},"Mangle another page"),hr();
60
61 my $parse_tree = parse_html($response->content);
62 $parse_tree->traverse(\&swallow);
63 $parse_tree->traverse(\®urgitate);
64 $parse_tree->delete();
65 1;
66 }
67
68 sub swallow {
69 my ($node,$start,$depth) = @_;
70 return 1 if ref($node);
71 return &Travesty::swallow($node);
72 }
73
74 sub regurgitate {
75 my ($node,$start,$depth) = @_;
76 if (ref($node)) {
77 return 1 if $node->tag =~ /^(html|head|body)/i;
78 return 0 if $node->isInside(’head’);
79 &Travesty::reset() if $start;
80 print $node->starttag if $start;
81 print $node->endtag unless $start;
82 } else {
83 my @words = split(/\s+/,$node);
84 print &Travesty::regurgitate(scalar(@words));
85 }
86 1;
87 }
88
89 # ---------------------------------------------------
90 # THIS SECTION IS WHERE THE PROMPT IS CREATED
91 # ---------------------------------------------------
92 sub prompt_for_url {
93 print start_html(’The Mangler’),
94 -e $ICON ? img({-src=>$ICON,-align=>LEFT}): ’’,
95 h1(’The Mangler’),
96 "Enter the URL of an HTML page and press ",em("Mangle. "),
97 "For best results, choose a document containing several pages of text.",
98 "Very large documents may take a long time to process, so have patience.",
99
100 start_form(),
101 textfield(-name=>’mangle’, -size=>60),
102 submit(-value=>’Mangle’),
103 end_form(),
104 hr(),
105 address(
106 "Author: ",
107 a({-href=>’http://www.genome.wi.mit.edu/~lstein/’},’Lincoln D. Stein’),
108 ),
109 end_html();
110 }
111
112 # derived from the code in Perl’s eg/ directory
113 package Travesty;
114
115 sub swallow {
116 my $string = shift;
117 $string =~ tr/\n/ /s;
118
119 push(@ary, split(/\s+/, $string));
120 while ($#ary > 1) {
121 $a = $p;
122 $p = $n;
123 $w = shift(@ary);
124 $n = $num{$w};
125 if ($n eq ’’) {
126 push(@word, $w);
127 $n = pack(’S’, $#word);
128 $num{$w} = $n;
129 }
130 $lookup{$a . $p} .= $n;
131 }
132 1;
133 }
134
135 sub reset {
136 my($key) = each(%lookup);
137 ($a,$p) = (substr($key,0,2),substr($key,2,2));
138 }
139
140 sub regurgitate {
141 my $words = shift;
142 my $result = ’’;
143 while (--$words >= 0) {
144
145 $n = $lookup{$a . $p};
146 ($foo,$n) = each(%lookup) if $n eq ’’;
147 $n = substr($n,int(rand(length($n))) & 0177776,2);
148 $a = $p;
149 $p = $n;
150 ($w) = unpack(’S’, $n);
151 $w = $word[$w];
152
153 # Most of this formatting is only for <PRE> text.
154 # We’ll leave it in for that purpose.
155 $col += length($w) + 1;
156 if ($col >= 65) {
157 $col = 0;
158 $result .= "\n";
159 } else {
160 $result .= ’ ’;
161 }
162 $result .= $w;
163 if ($w =~ /\.$/) {
164 if (rand() < .1) {
165 $result .= "\n";
166 $col = 80;
167 }
168 }
169
170 }
171 return $result;
172 }
|
|