#!/usr/local/bin/perl -s

# Generate a LaTeX representation of the PNG spec.
# TeX/LaTeX is then used to produce a PostScript file.
# Formatting adheres to RFC requirements when -rfc switch is used.
# Usage: maketex [-rfc] [-w3c] <master >output

# Tom Lane <tgl@sss.pgh.pa.us> and Oliver Fromme <fromme@rz.tu-clausthal.de>

# Generate heading boilerplate
print STDOUT <<'ENDLIT';
%Format: LaTeX 2e

% To print on A4 stock, replace 'letterpaper' with 'a4paper',
% and optionally increase \textheight half an inch.

ENDLIT

# Use oneside style for RFC headings, twoside style otherwise.
if ($rfc) {
print STDOUT <<'ENDLIT';
\documentclass[letterpaper,oneside,11pt]{article}
ENDLIT
} else {
print STDOUT <<'ENDLIT';
\documentclass[letterpaper,twoside,11pt]{article}
ENDLIT
}

print STDOUT <<'ENDLIT';

\usepackage{times}       % Select Times font family

% Page dimensions: set to meet RFC format requirements on 8.5x11 paper
\textheight=8.15in
\topmargin=0in
\headheight=11pt
\textwidth=6.5in
\oddsidemargin=0in
\evensidemargin=0in
\raggedbottom

% Various formatting hacks

\makeatletter

\setcounter{tocdepth}{3}
% default TOC layout doesn't leave enough space for section numbers
\renewcommand{\l@subsection}{\@dottedtocline{2}{1.5em}{2.8em}}

ENDLIT

if ($rfc) {
print STDOUT <<'ENDLIT';
% Page style: emulation of nroff left/center/right head/foot options
\let\lefthead\@empty
\let\centerhead\@empty
\let\righthead\@empty
\let\leftfoot\@empty
\let\centerfoot\@empty
\def\ps@standardpage{%
  \let\@mkboth\@gobbletwo
  \def\@oddhead{\reset@font\rmfamily\lefthead\hfil\centerhead\hfil\righthead}%
  \def\@oddfoot{\reset@font\rmfamily\leftfoot\hfil\centerfoot\hfil\thepage}%
  \let\@evenhead\@oddhead\let\@evenfoot\@oddfoot
}

\def\ps@firstpage{%
  \let\@mkboth\@gobbletwo
  \let\@oddhead\@empty
  \def\@oddfoot{\reset@font\rmfamily\leftfoot\hfil\centerfoot\hfil\thepage}%
  \let\@evenhead\@oddhead\let\@evenfoot\@oddfoot
}

\pagestyle{standardpage}
ENDLIT

} else {

print STDOUT <<'ENDLIT';
% Page style: somewhat like 'headings', but put section name on right page
\pagestyle{myheadings}
\def\sectionmark#1{\markright{\uppercase{\thesection. \ #1}}}
ENDLIT
}

print STDOUT <<'ENDLIT';

% Default title formatting is no good for RFC --- overrides pagestyle
\renewcommand\maketitle{\par
  \vskip 1.5em
  \begin{center}
    \LARGE \@title
  \end{center}
  \par
  \vskip 1.5em}

% Eliminate special formatting of the abstract, too
\renewenvironment{abstract}%
  {\section*{\abstractname}}
  {\par}

% Paragraphing: use "block" style with no parindent
\sloppy
\parindent=0pt
% increase parskip to provide some visual separation between paragraphs
\parskip=1.5ex plus0.5ex minus0.25ex
% adjust list formatting to match
\def\@listI{\leftmargin\leftmargini
            \topsep 0pt plus 2pt
            \parsep 1.0ex plus0.2ex minus0.2ex
            \itemsep 0.5ex plus0.3ex}

% If you prefer indented paragraphs, try these values with default parindent:
% \parskip=0.5ex plus0.5ex minus0.25ex
% \def\@listI{\leftmargin\leftmargini
%             \topsep 0pt plus 2pt
%             \parsep 0.4ex plus0.4ex minus0.2ex
%             \itemsep 0.1ex plus0.1ex}

% stuff for translating <PRE> environment
\newcommand{\bverbatim}{%
    \begin{list}{}{\parsep=0ex}\item[]
    \begingroup\tt\frenchspacing
    \obeylines\obeyspaces\raggedright
}
\newcommand{\everbatim}{\endgroup \end{list}}

\makeatother

\begin{document}

ENDLIT

if ($rfc) {
    print STDOUT "\\thispagestyle{firstpage}\n\n";
} else {
    print STDOUT "\\thispagestyle{empty}\n\n";
}


# $debug = 0; # debug can be set to 1 to 3 from command line

$LineNo = 0;
$lastline = '';
$inquote = 0;
$verbatim = 0;
$if_nest_ok = 0;
$if_nest_fail = 0;
$verbatim_tex = 0;
$in_header = 1;
$in_title = 0;
$in_abstract = 0;
$in_toc = 0;

# Main loop
while (<STDIN>) {
    &process_line($_);
}

# Print end-of-document overhead
print "\n\\end{document}\n";

exit;

# Process one line

sub process_line {
	chop(local($line) = @_);
	$LineNo++;
	warn "Line $LineNo\n" if $debug>=3;

	# Handle conditional exclusion commands, doing nesting correctly
	local($skip_this_line) = 0;

	if ($line =~ /^<!-- IF RFC -->/i) {
	    if ($rfc && !$if_nest_fail) { $if_nest_ok++; }
	    else { $if_nest_fail++; }
	    $skip_this_line = 1;
	}
	if ($line =~ /^<!-- IF !RFC -->/i) {
	    if (!$rfc && !$if_nest_fail) { $if_nest_ok++; }
	    else { $if_nest_fail++; }
	    $skip_this_line = 1;
	}
	if ($line =~ /^<!-- IF W3C -->/i) {
	    if ($w3c && !$if_nest_fail) { $if_nest_ok++; }
	    else { $if_nest_fail++; }
	    $skip_this_line = 1;
	}
	if ($line =~ /^<!-- IF !W3C -->/i) {
	    if (!$w3c && !$if_nest_fail) { $if_nest_ok++; }
	    else { $if_nest_fail++; }
	    $skip_this_line = 1;
	}
	if ($line =~ /^<!-- ENDIF -->/i) {
	    if ($if_nest_fail) { $if_nest_fail--; }
	    else { $if_nest_ok--; }
	    $skip_this_line = 1;
	}
	if ($line =~ /^<!-- NROFF/i) {
	    $if_nest_fail++;
	    $skip_this_line = 1;
	}
	if ($line =~ /^NROFF -->/i) {
	    $if_nest_fail--;
	    $skip_this_line = 1;
	}
	if ($line =~ /^<!-- TEX/i) {
	    $verbatim_tex++;
	    $skip_this_line = 1;
	}
	if ($line =~ /^TEX -->/i) {
	    $verbatim_tex--;
	    $skip_this_line = 1;
	}
	if ($line =~ /^<!-- IF !TEX -->/i) {
	    $if_nest_fail++;
	    $skip_this_line = 1;
	}
	if ($line =~ /^<!-- IF HTML -->/i) {
	    $if_nest_fail++;
	    $skip_this_line = 1;
	}

	if ($if_nest_fail || $skip_this_line) {
	    if ($if_nest_fail < 0 || $if_nest_ok < 0 || verbatim_tex < 0) {
		die "Bogus if/endif matching\n";
	    }
	    return;
	}

	if ($verbatim_tex) {
	    print STDOUT "$line\n";
	    return;
	}

	# handle end-header command
	if ($line =~ /<!-- END HEADER -->/i) {
	    if ($in_title) {
		print STDOUT "}\n\\author{}\\date{}\\maketitle\n\n";
		$in_title = 0;
	    }
	    $in_header = 0;
	    return;
	}

	# handle head/foot string setting
	if ($line =~ /<!-- HEADFOOT (..) (.+) -->/i) {
            $def = $1;
            $val = $2;
	    if ($def eq 'LH') {
		print STDOUT "\\def\\lefthead{$val}\n";
	    } elsif ($def eq 'CH') {
		print STDOUT "\\def\\centerhead{$val}\n";
	    } elsif ($def eq 'RH') {
		print STDOUT "\\def\\righthead{$val}\n";
	    } elsif ($def eq 'LF') {
		print STDOUT "\\def\\leftfoot{$val}\n";
	    } elsif ($def eq 'CF') {
		print STDOUT "\\def\\centerfoot{$val}\n";
	    } else {
		warn "Bogus headfoot type: $line\n";
	    }
            return;
	}

	# ignore other special commands (notably INCLUDE)
	if ($line =~ /<!-- (.+) -->/) {
	    warn "Ignoring command $line" if $debug;
	    return;
	}

	# remove tags, translate entities and encode TeX special characters
	local ($texline) = $line;
	local ($tmp, $char, $tag, $origtag, $entity, $subst);
	local ($pos) = 0;
	while ($pos < length($texline)) {
	    $char = substr($texline,$pos,1);
	    if ($char eq '<') {
		# remove tag
		$tag = '';
		while ($pos < length($texline)) {
		    $pos++;
		    $char = substr($texline,$pos,1);
		    if ($char eq '>') {
			last;
		    } else {
			$tag .= $char;
		    }
		}
		if ($char eq '>') {
		    $origtag = $tag;
		    $tag =~ y/a-z/A-Z/;
		    $subst = '';
		    case: {
		      $tag eq 'UL' && ($subst = '\\begin{itemize}', last case);
		      $tag eq '/UL' && ($subst = '\\end{itemize}', last case);
		      $tag eq 'OL' && ($subst = '\\begin{enumerate}', last case);
		      $tag eq '/OL' && ($subst = '\\end{enumerate}', last case);
		      $tag eq 'LI' && ($subst = '\\item ', last case);
		      $tag eq 'DL' && ($subst = '\\begin{description}', last case);
		      $tag eq '/DL' && ($subst = '\\end{description}', last case);
		      $tag eq 'DT' && ($subst = '\\item[{', last case);
		      $tag eq 'DD' && ($subst = '}]\\leavevmode\\par ', last case);
		      $tag eq 'TT' && ($subst = '\\texttt{', last case);
		      $tag eq '/TT' && ($subst = '}', last case);
		      $tag eq 'EM' && ($subst = '\\textit{', last case);
		      $tag eq '/EM' && ($subst = '}', last case);
		      $tag eq 'STRONG' && ($subst = '\\textbf{', last case);
		      $tag eq '/STRONG' && ($subst = '}', last case);
		      $tag eq 'I' && ($subst = '\\textit{', last case);
		      $tag eq '/I' && ($subst = '}', last case);
		      $tag eq 'B' && ($subst = '\\textbf{', last case);
		      $tag eq '/B' && ($subst = '}', last case);
		      $tag eq 'PRE' && ($verbatim = 1, last case);
		      $tag eq '/PRE' && (&dump_pre_text,
					 $subst = '\\everbatim ',
					 $verbatim = 0, last case);
		      $tag eq 'CODE' && ($subst = '\\texttt{', last case);
		      $tag eq '/CODE' && ($subst = '}', last case);
		      $tag eq 'P' && ($subst = '\\par ', last case);
		      $tag eq 'BR' && ($subst = '\\newline ', last case);
#		      $tag eq 'HR' && ($subst = $in_abstract?'':'\\par\\vspace{2ex}\\hrule\\vspace{2ex}', last case);
		      $tag =~ /^A NAME=/ && (last case);
		      $tag =~ /^A HREF=/ && (last case);
		      $tag eq '/A' && (last case);
		      $tag eq 'H1' && (last case);
		      $tag eq '/H1' && (last case);
		      $tag eq 'H2' && (last case);
		      $tag eq '/H2' && (last case);
		      $tag eq 'H3' && (last case);
		      $tag eq '/H3' && (last case);
		      $tag eq 'H4' && (last case);
		      $tag eq '/H4' && (last case);
		      warn "Ignoring tag $tag" if $debug>=2;
		    }
		    substr($texline, $pos-length($tag)-1, length($tag)+2)
			= $subst;
		    $pos += length($subst)-length($tag)-2;
		} else {
		    warn qq(Missing ">" in tag, line $LineNo\n);
		}
	    } elsif ($char eq '&') {
		# translate entity, see the tables on these pages:
		# <http://www.w3.org/hypertext/WWW/MarkUp/Entities.html>
		$entity = '';
		while ($pos < length($texline)) {
		    $pos++;
		    $char = substr($texline,$pos,1);
		    if ($char eq ';') {
			last;
		    } else {
			$entity .= $char;
		    }
		}
		if ($char eq ';') {
		  case: {
		      $entity eq 'lt'     && ($subst = $verbatim?'<':'$<$', last case);
		      $entity eq 'gt'     && ($subst = $verbatim?'>':'$>$', last case);
		      $entity eq 'amp'    && ($subst = '\\&', last case);
		      $entity eq 'copy'   && ($subst = '\\copyright{}', last case);
		      $entity eq 'quot'   && ($subst = $verbatim?'"':&quotechar, last case);
		      $entity eq 'nbsp'   && ($subst = "~", last case);
		      warn qq(Unknown entity "&$entity;", line $LineNo\n);
		      $subst = '(?)';
		  }
		    substr($texline, $pos-length($entity)-1, length($entity)+2)
			= $subst;
		    $pos += length($subst)-length($entity)-2;
		} else {
		    warn qq(Missing ";" in entity, line $LineNo\n);
		}
	    } else {
		# ($char ne '<' && $char ne '&')
		# encode TeX special character, if any
		$subst = '';
	      case: {
		$char eq '$' && ($subst = '\\$', last case);
		$char eq '%' && ($subst = '\\%', last case);
		$char eq '#' && ($subst = '\\#', last case);
		$char eq '{' && ($subst = '\\{', last case);
		$char eq '}' && ($subst = '\\}', last case);
		$char eq '_' && ($subst = '\\_', last case);
		$char eq '^' && ($subst = '\\^{ }', last case);
		$char eq '~' && ($subst = '\\~{ }', last case);
		$char eq '"' && ($subst = $verbatim?'"':&quotechar, last case);
		$char eq '\\'&& ($subst = '\\texttt{\\symbol{92}}', last case);
		# allow linebreaks after slashes, but only if between letters.
		# this considerably improves formatting of
		# paragraphs that contain long file paths
		$char eq '/' && $pos > 0 &&
		    substr($texline,$pos-1,3) =~ m|[a-z]/[a-z]|i &&
		    ($subst = '/\\allowbreak{}', last case);
	      }
		if ($subst) {
		    substr($texline, $pos, 1) = $subst;
		    $pos += length($subst)-1;
		}
	    }
	    $pos++;
	}

	# Handle title: convert to lefthand page heading if non-RFC headings
	if (!$rfc && $line =~ m|^<TITLE>(.*)</TITLE>|i) {
	    print STDOUT "\\markboth{\\uppercase{$1}}{\\uppercase{$1}}\n";
	}

	# handle headings
	if ($line =~ /^<H([123456])>/i) {
	    $tag = $1;
	  case: {
	      $tag eq '1' && ($tmp = 'part', last case);
	      $tag eq '2' && ($tmp = 'section', last case);
	      $tag eq '3' && ($tmp = 'subsection', last case);
	      $tag eq '4' && ($tmp = 'subsubsection', last case);
	      $tag eq '5' && ($tmp = 'paragraph', last case);
	      $tag eq '6' && ($tmp = 'subparagraph', last case);
	      $tmp = '';
	  }
	    # Add '*' (no section # or TOC entry) if no A NAME
	    if (! ($line =~ /^<H.><A NAME=/i)) {
		$tmp = $tmp . '*';
	    }
	    # H1 lines are output as title
	    if ($tag eq '1') {
		warn "H1 outside header\n" if ! $in_header;
		if ($in_title) {
		    print STDOUT "\\\\ $texline\n";
		} else {
		    print STDOUT "\\title{$texline\n";
		}
		$in_title = 1;
	    } elsif ($in_title) {
		print STDOUT "}\n\\author{}\\date{}\\maketitle\n\n";
		$in_title = 0;
	    }
	    # Special hacks for abstract and table-of-contents
	    if ($tag eq '2') {
		if ($texline eq 'Abstract') {
		    $texline = '\\begin{abstract}';
		    $tmp = '';
		    $in_abstract = 1;
		} elsif ($in_abstract) {
		    # end of abstract
		    print STDOUT "\\end{abstract}\n\n";
		    $in_abstract = 0;
		}
		if ($texline eq 'Table of Contents') {
		    print STDOUT "\n\\newpage\n";
		    print STDOUT "\\begingroup\n";
		    print STDOUT "\\parskip=0.0ex plus0.25ex minus0.0ex\n";
		    print STDOUT "\\tableofcontents\n";
		    print STDOUT "\\endgroup\n";
		    print STDOUT "\n\\newpage\n";
		    $in_toc = 1;
		    # set header mode to ignore actual contents of TOC part
		    $in_header = 1;
		} elsif ($in_toc) {
		    # end of TOC
		    $in_toc = 0;
		    $in_header = 0;
		}
	    }
	    if ($debug) {
		if ($tag eq '2') {warn qq(section "$texline"\n);}
		if ($tag eq '3' && $debug >= 2) {
		    warn qq( - subsection "$texline"\n);
		}
	    }
	    $texline = '\\'.$tmp.'{'.$texline.'}' if $tmp;
	}

	if ($verbatim) {
	    # PRE text is shoved into a temp array and dumped later
	    push(@saved_pre_text, $texline);
	} else {
	    # Special processing for non-PRE text
	    # If we have two uppercase letters followed by a period and a
	    # space, or question mark and space, assume it is a sentence end.
	    $texline =~ s/([A-Z][A-Z])\. /$1\\@. /g;
	    $texline =~ s/([A-Z][A-Z])\.$/$1\\@./g;
	    $texline =~ s/([A-Z][A-Z])\? /$1\\@? /g;
	    $texline =~ s/([A-Z][A-Z])\?$/$1\\@?/g;
	    $texline =~ s|([A-Z][A-Z]\})\. |$1\\@. |g;
	    $texline =~ s|([A-Z][A-Z]\})\.$|$1\\@.|g;
	    # skip HTML head and avoid consecutive empty lines
	    print STDOUT "$texline\n"
		if (($lastline || $texline) && ! $in_header);
	}

	# reset if missing right quote at end of paragraph
	if ($inquote && (!$texline || $texline eq '\\par')) {
	   warn "Missing right quote, line $LineNo\n";
	   $inquote = 0;
	}
	$lastline = $texline;
}


sub quotechar {
    $inquote = !$inquote;
    return $inquote ? '``' : "''";
}


# Dump out a block of <PRE> text.

sub dump_pre_text {
    local ($i, $texline, $lastline, $pos);
    if (@saved_pre_text) {
	if (@saved_pre_text > 15) {
	    print STDOUT "\\bverbatim\\small\n";
	} else {
	    print STDOUT "\\bverbatim\n";
	}
	$lastline = '';
	# we ignore the first line, which is where the <PRE> was.
	for ($i = 1; $i <= $#saved_pre_text; $i++) {
	    $texline = $saved_pre_text[$i];
	    if ($texline) {
		# force indentation if verbatim text
		$pos = 0;
		while (length($texline)>0 && substr($texline, 0, 1) eq ' ') {
		    substr($texline, 0, 1) = '';
		    $pos++;
		}
		$texline = '\\verb|'.(' ' x $pos).'|'.$texline if $pos;
		# discourage page breaking except after empty lines
		print STDOUT "\\nopagebreak[4]\n" if $lastline;
		print STDOUT "$texline\n";
	    } else {
		# force empty line if verbatim text
		print STDOUT "\\vspace{\\baselineskip}\n";
	    }
	    $lastline = $texline;
	}
	# \everbatim is emitted as translation of </PRE>
	# print STDOUT "\\everbatim\n";
	$#saved_pre_text = -1;
    }
}
