Listing 1.
The Mangler
Lincoln D. Stein

Create Surreal HTML Pages with The Mangler
The Perl Journal, Spring 1997
  The Mangler


0 #!/usr/bin/perl 

1 # File: mangler.cgi 

2 

3 use LWP::UserAgent; 

4 use HTML::Parse; 

5 use HTTP::Status; 

6 use CGI qw(:standard :html3); 

7 $ICON = "pow.gif"; 

8 

9 srand(); 

10 

11 $url_to_mangle = param(’mangle’) if request_method() eq ’POST’; 

12 

13 

14 print header(); 

15 

16 if ($url_to_mangle && mangle($url_to_mangle)) { 

17     ;				# nothing to do 

18 } else { 

19     prompt_for_url(); 

20 } 

21 

22 # --------------------------------------------------- 

23 # THIS SECTION IS WHERE URLs ARE FETCHED AND MANGLED 

24 # --------------------------------------------------- 

25 sub mangle { 

26   my $url = shift; 

27   my $agent = new LWP::UserAgent; 

28   my $request = new HTTP::Request(’GET’,$url); 

29   my $response = $agent->request($request); 

30 

31   unless ($response->isSuccess) { 

32     	print h1(’Error Fetching URL’), 

33 	   "An error occurred while fetching the document located at ", 

34 	     a({href=>$url},"$url."), 

35 	     p(), 

36   "The error was ",strong(statusMessage($response->code)),".", 

37 	     hr(); 

38 	     return undef; 

39   } 

40 

41   # make sure that it’s an HTML document! 

42   my $type = $response->header(’Content-type’); 

43   unless ($type eq ’text/html’) { 

44     	print h1("Document isn’t an HTML File!"), 

45 	    "The URL ",a({href=>$url},"$url"), 

46 	    " is a document of type ",em($type),". ", 

47 	    "Please choose an HTML file to mangle.", 

48 	    hr(); 

49     	return undef; 

50   } 

51 

52   print start_html(-title=>’Mangled Document’, 

53 		                   -xbase=>$url), 

54         div({-align=>CENTER}, 

55 	            h1("The Mangler"), 

56 	            strong(a({-href=>$url},$url)) 

57 	        ), 

58 	        p(), 

59      a({-href=>self_url()},"Mangle another page"),hr(); 

60 

61   my $parse_tree = parse_html($response->content); 

62   $parse_tree->traverse(\&swallow); 

63   $parse_tree->traverse(\&regurgitate); 

64   $parse_tree->delete(); 

65   1; 

66 } 

67 

68 sub swallow { 

69     my ($node,$start,$depth) = @_; 

70     return 1 if ref($node); 

71     return &Travesty::swallow($node); 

72 } 

73 

74 sub regurgitate { 

75     my ($node,$start,$depth) = @_; 

76     if (ref($node)) { 

77 	        return 1 if $node->tag =~ /^(html|head|body)/i; 

78         	return 0 if $node->isInside(’head’); 

79         	&Travesty::reset() if $start; 

80         	print $node->starttag if $start; 

81     	    print $node->endtag unless $start; 

82     } else { 

83 	        my @words = split(/\s+/,$node); 

84 	        print &Travesty::regurgitate(scalar(@words)); 

85     } 

86     1; 

87 } 

88 

89 # --------------------------------------------------- 

90 # THIS SECTION IS WHERE THE PROMPT IS CREATED 

91 # --------------------------------------------------- 

92 sub prompt_for_url { 

93    print start_html(’The Mangler’), 

94    -e $ICON ? img({-src=>$ICON,-align=>LEFT}): ’’, 

95    h1(’The Mangler’), 

96 "Enter the URL of an HTML page and press ",em("Mangle. "), 

97 "For best results, choose a document containing several pages of text.", 

98 "Very large documents may take a long time to process, so have patience.", 

99 

100   start_form(), 

101   textfield(-name=>’mangle’, -size=>60), 

102   submit(-value=>’Mangle’), 

103   end_form(), 

104   hr(), 

105   address( 

106 		      "Author: ", 

107 		 a({-href=>’http://www.genome.wi.mit.edu/~lstein/’},’Lincoln D. Stein’), 

108 		 ), 

109 end_html(); 

110 } 

111 

112 # derived from the code in Perl’s eg/ directory 

113 package Travesty; 

114 

115 sub swallow { 

116     my $string = shift; 

117     $string =~ tr/\n/ /s; 

118 

119     push(@ary, split(/\s+/, $string)); 

120     while ($#ary > 1) { 

121         	$a = $p; 

122         	$p = $n; 

123 	        $w = shift(@ary); 

124         	$n = $num{$w}; 

125         	if ($n eq ’’) { 

126 	            push(@word, $w); 

127 	            $n = pack(’S’, $#word); 

128 	            $num{$w} = $n; 

129 	        } 

130         	$lookup{$a . $p} .= $n; 

131     } 

132     1; 

133 } 

134 

135 sub reset { 

136     my($key) = each(%lookup); 

137     ($a,$p) = (substr($key,0,2),substr($key,2,2)); 

138 } 

139 

140 sub regurgitate { 

141    my $words = shift; 

142    my $result = ’’; 

143    while (--$words >= 0) { 

144 

145        $n = $lookup{$a . $p}; 

146        	($foo,$n) = each(%lookup) if $n eq ’’; 

147        	$n = substr($n,int(rand(length($n))) & 0177776,2); 

148        	$a = $p; 

149 	       $p = $n; 

150        	($w) = unpack(’S’, $n); 

151        	$w = $word[$w]; 

152 	 

153 	      # Most of this formatting is only for <PRE> text. 

154       	# We’ll leave it in for that purpose.

155 	      $col += length($w) + 1; 

156 	      if ($col >= 65) { 

157 	          $col = 0; 

158 	          $result .= "\n"; 

159       	} else { 

160 	          $result .= ’ ’; 

161       	}

162 	      $result .= $w; 

163       	if ($w =~ /\.$/) { 

164 	          if (rand() < .1) { 

165               		$result .= "\n"; 

166 	              	$col = 80; 

167 	          } 

168       	} 

169 	

170     } 

171     return $result; 

172 }