#!/usr/bin/perl -w # $Id: setext2html.txt,v 1.9 2007/09/08 $ # setext -> HTML converter # # (C) 2002 Erik Oliver # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, # USA. use strict; use Carp; # Global variables my $numargs = @ARGV; my $infile; my $outfile; my %href; my @toc; my $tocindex = -1; sub style_encode($) { use HTML::Entities; my $string = shift; my $flag = 0; $string = encode_entities($string,"<>&"); # debugging: print the ho-tt matching lines # if ($string =~ m#\b(\S+)_\b#) { # print STDERR "IN: ", $string, "\n"; # $flag = 1; # } # bold-tt $string =~ s#\*\*([^\*]*)\*\*#$1#g; # italic-tt $string =~ s#~(\S*)~# ($a = $1) =~ s,~, ,g; "$a"#eg; # underline-tt $string =~ s#\b_(\S*)_\b# ($a = $1) =~ s,_, ,g; "$a"; #eg; # hot-tt my $h; $string =~ s#\b(\S+)_\b# $h = $href{$1}; ($a = $1) =~ s,_, ,g; $h ? qq'$a' : qq'$a'; #eg; # if ($flag == 1) { # print STDERR "OUT: ", $string, "\n"; # } return($string); } sub encode_only($) { use HTML::Entities; my $string = shift; $string = encode_entities($string,"<>&"); return($string); } if ($numargs == 1) { if($ARGV[0] =~ s/\.etx$// ) { print STDERR "warning: one argument form used but called with $ARGV[0].etx, argument shortened to $ARGV[0].\n"; } $infile = "$ARGV[0].etx"; $outfile = "$ARGV[0].html"; } elsif ($numargs == 2) { $infile = $ARGV[0]; $outfile = $ARGV[1]; } else { print STDERR "usage: $0 infile.etx outfile.html\n"; print STDERR "usage: $0 inoutfile\n"; exit -1; } if (! -e $infile) { print STDERR "error: Input, $infile, does not exist\n"; exit -1; } if (! -r $infile) { print STDERR "error: Input, $infile, not readable\n"; exit -1; } open INFILE, "<$infile" || die "Could not open $infile for reading, $!"; my @data = ; # slurp input chomp @data; # strip newlines close INFILE; open OUTFILE,">$outfile" || die "Could not open $outfile for writing, $!"; ## Loop 1: Find any href-tt tags and hash the URL against a key ## [Also escape a literal "`" with +++. for(my $loop=0; $loop <= @data; $loop++) { if(!$data[$loop]) { next; } # skip blank lines # href-tt finder: .. _href URL if($data[$loop] =~ /^\.\.\s+_(\S*)\s+(.*)\s*/ ) { my $key = $1; my $pos = $loop + 1; my $value = $2; $data[$loop] = ""; while ($pos <= $#data) { # lookahead if($data[$pos] =~ m/^\.\. ([^_])/) { $value .= "$1"; $data[$pos] = ""; $pos++; next; } else { last; } } $href{$key} = encode_entities($value,"<>&"); } $data[$loop] =~ s/``/+++/g; } my ($htmltitle, $htmlauthor, $htmldate) = ("","",""); ## Loop 2: Find headers and flow paragraphs, etc. together for(my $loop = 0; $loop <= @data; $loop++) { if(!$data[$loop]) {next;} # skip blank lines $_ = $data[$loop]; if(/^\.\. / ) { # supress-tt $data[$loop] = ""; } elsif (/^Subject: (.*$)/) { if($htmltitle eq "") { $htmltitle = style_encode($1); } $data[$loop] = ""; } elsif (/^From: (.*$)/) { if($htmlauthor eq "") { $htmlauthor = $1; } $data[$loop] = ""; } elsif (/^Date: (.*$)/) { if($htmldate eq "") { $htmldate = $1; } $data[$loop] = ""; } elsif (/^===/) { if($htmltitle eq "") { $htmltitle = style_encode($data[$loop-1]); } $tocindex++; my $headline = style_encode($data[$loop-1]); my $id = "sh$tocindex"; $toc[$tocindex] = "1:$headline"; $data[$loop-1] = "

$headline

\n"; $data[$loop] = ""; } elsif (/^---/) { if($htmltitle eq "") { $htmltitle = style_encode($data[$loop-1]); } $tocindex++; my $headline = style_encode($data[$loop-1]); my $id = "sh$tocindex"; $toc[$tocindex] = "2:$headline"; $data[$loop-1] = "

$headline

\n"; $data[$loop] = ""; } elsif (/^\s{0,2}\* /) { # this is a list $data[$loop] =~ s/^\s*\* //; $data[$loop] = "

" . style_encode($data[$loop]); my $pos = $loop+1; while($pos <= $#data) { if($data[$pos] =~ s/^\s{0,2}\* //) { $data[$loop] .= "
" . style_encode($data[$pos]); $data[$pos] = ""; $pos++; } elsif ($data[$pos] =~ s/^ ([^ *])/$1/) { $data[$loop] .= " " . style_encode($data[$pos]); $data[$pos] = ""; $pos++; } else { last; } } $data[$loop] .= "

\n"; } elsif (/^\s{0,2}>\s*/) { # this is included text with a ">" $data[$loop] =~ s/^\s{0,2}>\s*//; $data[$loop] = style_encode($data[$loop]); my $pos = $loop+1; while($pos <= $#data) { if($data[$pos] =~ s/^\s{0,2}>\s*//) { if($data[$pos] eq "") { $data[$loop] .= "
"; } else { $data[$loop] .= " ". style_encode($data[$pos]); $data[$pos] = ""; $pos++; } } else { last; } } $data[$loop] = "

" . $data[$loop] . "

\n"; } elsif (/^ [^ ]/) { # this is body text to wrap up $data[$loop] =~ s/^ //; $data[$loop] = style_encode($data[$loop]); my $pos = $loop+1; while($pos <= $#data) { if($data[$pos] =~ s/^ ([^ >])/$1/) { $data[$loop] .= " " . style_encode($data[$pos]); $data[$pos] =""; $pos++; } else { last; } } $data[$loop] = "

" . $data[$loop] . "

\n"; } elsif (/`/) { # if nothing else and there is a ` assume multiline # verbatim environment $data[$loop] =~ s/`//; $data[$loop] = encode_only($data[$loop]); my $pos = $loop+1; while($pos<= $#data) { if($data[$pos] =~ s/`//) { if($data[$pos] ne '') { $data[$loop] .= "\n". encode_only($data[$pos]); $data[$pos] = ""; } last; } else { $data[$loop] .= "\n" . encode_only($data[$pos]); $data[$pos] = ""; $pos++; } } $data[$loop] = "

" . $data[$loop] . "

\n"; } elsif (/\$\$/) { $data[$loop] = "" . $data[$loop] . ""; } elsif (/^\s*$/) { $data[$loop] = ""; # effectively a blank line } else { # should only be here if at end or next line is the === or --- next if ($loop == $#data); next if ($data[$loop+1] =~ /^===/); next if ($data[$loop+1] =~ /^---/); carp "Unhandled typotag, line = $loop, \"$_\"\n"; } } print OUTFILE qq| $htmltitle

|; # header information print OUTFILE "

$htmltitle

\n" if ($htmltitle); print OUTFILE "

By ",encode_only($htmlauthor),"

\n" if ($htmlauthor); print OUTFILE "

$htmldate

\n" if ($htmldate); # table of contents? print OUTFILE "

Table of Contents

\n"; print OUTFILE "

$content|; $lastlevel = $level; } elsif ($lastlevel == $level) { # stayed same, end prior entry start new one print OUTFILE qq|
$content|; } elsif ($level == 2 && $lastlevel == 1) { # starting new level print OUTFILE qq|\n
- $content|; $lastlevel = $level; } elsif ($level == 1 && $lastlevel == 2) { # finishing a level print OUTFILE qq|
\n
$content|; $lastlevel = $level; } else { print STDERR "toc error lastlevel vs level mismatch\n"; die; } } print OUTFILE "

\n"; if ($lastlevel == 2) { print OUTFILE "\n"; } for(my $loop = 0; $loop <= @data; $loop++) { if(!$data[$loop]) {next;} # skip blank lines $data[$loop] =~ s/\+\+\+/`/g; print OUTFILE "$data[$loop]\n"; } print OUTFILE qq|

|; close OUTFILE;