#!/usr/pkg/bin/perl
# To do:
# - What if format says one type of line and data says another?
#   (In gtbl, the data takes precedence).
# - Doesn't compute border values properly.  Need function to set particular
#   border value, after clearing value for that border.
# - box/allbox and doublebox aren't mutually exclusive.
# - Error messages aren't great - they often are misleading wrt line number.
#   To do a better job of this, probably would need to do more data line
#   checking as soon as the line is parsed.

# Script type:	perl5

# tblcvt - troffcvt table preprocessor

# Read troff input looking for tables (.TS + .TE and everything in between).
# Parse table options and format lines, then read table data lines and write
# them out in a more easily parsable form.

# The output is intended to be used with troffcvt and a special set of
# request redefinitions.  The output cannot be parsed by troffcvt with
# compatibility mode enabled (i.e., you cannot specify the -C flag to
# troffcvt).

# Table specifications are collected using these variables (among others):
# @format	Each element of this array is itself an array holding the
#		column entries for a row of format specifiers.
# @rawdata	Each element of this array holds one line of table data
#		(after \ continuation collapsing).  The array holds "real"
#		data, and data lines that are actually just troff commands
#		(beginning with a dot).  The variable $datarows is set to
#		the number of data rows that are *not* troff commands.
# @formatidx	Array to map $rawdata[i] to the index of the format row to be
#		used when formatting the data row.  Elements of this array
#		may have the following values:
#		>= 0	Index into @format
#		-1	The data row is a line-drawing row and doesn't need
#			a format row
#		-2	The data row is a troff command

# 05 Feb 1997
# Paul DuBois
# dubois@primate.wisc.edu
# http://www.primate.wisc.edu/people/dubois

# 05 Feb 97 V1.00
# - Created.

use strict refs;

($prog = $0) =~ s|.*//||;		# get script name for messages

@c = ();				# character buffer

# state numbers and initial state

$seek_table = 1;
$seek_options_or_format = 2;
$seek_format = 3;
$seek_table_content = 4;

$state = $seek_table;

$table_num = 0;

# table alignment types
#
# Note that since the values are strings, must compare using
# "eq" rather than "==".

$align_left = "L";
$align_center = "C";

# End per-table values

# Per-column values

# Column entry types.  For the first set, it's important that the
# values be the key letter that signifies the corresponding column type.
# For the second set, the values must be something that isn't any other column
# type specifier.  $col_borderonly is a special fake type used for the case
# that a format line ends in | or || but has no other specifier following
# it.  $col_empty is a special fake type used to indicate in the output that
# a cell's data is empty.  $col_X is used during span calculations (it means
# any non-span, non-line-drawing format type).
#
# Note that since the values are strings, must compare using
# "eq" rather than "==".

$col_left = "L";
$col_right = "R";
$col_center = "C";
$col_numeric = "N";
$col_alpha = "A";
$col_hspan = "S";
$col_vspan = "^";
$col_line = "_";
$col_dbl_line = "=";

$col_borderonly = "|";
$col_empty = "E";
$col_X = "X";
$col_visited = "1";
$col_unknown = "0";

# border specifiers (these are or'ed together as necessary)

$bord_left_s = 1;		# left border, single/double
$bord_left_d = 2;
$bord_right_s = 4;		# right border, single/double
$bord_right_d = 8;
$bord_top_s = 16;		# top border, single/double
$bord_top_d = 32;
$bord_bottom_s = 64;		# bottom border, single/double
$bord_bottom_d = 128;

# masks to pull out parts of border

$bord_left = ($bord_left_s | $bord_left_d);
$bord_right = ($bord_right_s | $bord_right_d);
$bord_top = ($bord_top_s | $bord_top_d);
$bord_bottom = ($bord_bottom_s | $bord_bottom_d);

# End per-column values

# Output request names

$tbl_begin = ".T*table*begin";
$tbl_end = ".T*table*end";
$tbl_col_info = ".T*column*info";
$tbl_row_begin = ".T*row*begin";
$tbl_row_end = ".T*row*end";
$tbl_cell_info = ".T*cell*info";
$tbl_cell_begin = ".T*cell*begin";
$tbl_cell_end = ".T*cell*end";
$tbl_row_line = ".T*row*line";
$tbl_cell_line = ".T*cell*line";
$tbl_empty_cell = ".T*empty*cell";
$tbl_spanned_cell = ".T*spanned*cell";

# Read and process input

while (<>)
{
#warn "state: $state\n";
	chomp;

	if ($state == $seek_table)
	{
		if (!/^\.\s*TS\s*(H)?\s*(\\".*)?$/)	# look for .TS or .TS H
		{
			print "$_\n";
			next;
		}

		++$table_num;
		$section_num = 1;

		$expect_TH = ($1 eq "H");
		$header_rows = 0;		# number of data rows in header

		# set options, format to defaults
		$align = $align_left;
		$expand = "n";
		$box = "n";
		$all_box = "n";
		$double_box = "n";
		$tab = "\t";
		$linesize = 1;
		$eqndelim = "\$\$";
		@format = ();		# format for columns
		@columnglobal = ();	# column global values
		@rawdata = ();		# cell data values
		@formatidx = ();	# data row -> format row map
		$fmtidx = 0;

		$tblcols = 0;

		$state = $seek_options_or_format;
		next;
	}
	if (/^\.\s*TH\s*(\\".*)?$/)	# allow .TH at any point after .TS
	{
		if ($expect_TH)
		{
			# number of header rows is the number of data
			# rows collected so far
			$header_rows = @rawdata;
			$expect_TH = 0;
		}
		else
		{
			Warn ("extraneous .TH?");
		}
		next;
	}
	if (/^\.\s*T\&\s*(\\".*)?$/)	# allow .T& at any point after .TS
	{
		Warn ("missing table data?") if !@rawdata;
		Warn ("missing .TH?") if $expect_TH;

		# begin a new section of the current table
		++$section_num;

		# data rows in this section use format rows in this section
		# so make sure $fmtidx starts with next format line index
		$fmtidx = @format;
		$state = $seek_format;
		next;
	}
	if (/^\.\s*TE\s*(\\".*)?$/)	# allow .TE at any point after .TS
	{
		Warn ("missing .TH?") if $expect_TH;

		DumpTable ();
		$state = $seek_table;
		next;
	}
	if (/^\./ && $state != $seek_table_content)
	{
		Warn ("suspect request?: $_");
	}
	if ($state == $seek_options_or_format)
	{
		$state = $seek_format;
		if (/;\s*$/)		# it's an options line
		{
			ParseOptionsLine ();
			next;
		}
		# current line is a format line, just fall through to next test
	}
	if ($state == $seek_format)
	{
		if (/\.\s*$/)		# last format line
		{
			s/\.\s*$//;	# strip dot
			$state = $seek_table_content;
		}
		# might have multiple format lines, separated by commas
		@line = split (",", $_);
		ParseFormatLine (shift (@line)) while @line;

		# if this was last format line in section (state just changed
		# to $seek_table_content), and this is the initial format
		# section, calculate those values which cannot be changed
		# in subsequent format sections (following .T&).

		CalculateFormatConstants ()
			if $state == $seek_table_content && $section_num == 1;

		next;
	}
	if ($state == $seek_table_content)
	{
		ParseDataRow ();
		next;
	}
	die "$prog: logic error, unknown state\n";
}

Die ("EOF while collecting table (missing .TE?)") if $state != $seek_table;

exit (0);

# ----------------------------------------------------------------------------

# Parse a table options line (first line after .TS)

sub ParseOptionsLine
{
my ($key, $rest);
my ($status, $opt);

#warn "process options: $_\n";
	s/;\s*$//;		# strip semicolon at end
	while (/^[\s,]*(\w+)(.*)/)
	{
		$key = $1;		# strip off keyword
		$_ = $2;
		if ($key eq "center")
		{
			$align = $align_center;
			next;
		}
		if ($key eq "expand")
		{
			$expand = "y";
			next;
		}
		if ($key eq "box")
		{
			$box = "y";
			next;
		}
		if ($key eq "allbox")
		{
			$all_box = $box = "y";	# allbox implies box
			next;
		}
		if ($key eq "doublebox")
		{
			$double_box = $box = "y";	# doublebox implies box
			next;
		}
		if ($key eq "tab")
		{
			($status, $opt) = ExtractParenOption ();
			if ($status == 0)
			{
				Warn ("bad tab setting");
				next;
			}
			$tab = $opt;
			next;
		}
		if ($key eq "linesize")
		{
			($status, $opt) = ExtractParenOption ();
			if ($status == 0)
			{
				Warn ("bad linesize setting");
				next;
			}
			$linesize = $opt;
			next;
		}
		if ($key eq "delim")
		{
			# parse the delimiters, though they're not actually used
			($status, $opt) = ExtractParenOption ();
			if ($status == 0)
			{
				Warn ("bad delim setting");
				next;
			}
			$eqndelim = $opt;
			next;
		}
		Warn ("unrecognized tbl option keyword: $key");
	}
	Warn ("extra ignored on tbl option line: $_") unless /^\s*$/;

# print values here if debugging
}


# Extract an option in parentheses, e.g., for "linesize(2)", "tab(:)".
# Returns a two-element array.  First element is the status (zero = failure,
# non-zero = success), second element is the option if status is non-zero.

sub ExtractParenOption
{
my ($value);

	return (0, "") unless /^\s*\(([^)]+)\)(.*)/;
	$value = $1;
	$_ = $2;
	return (1, $value);
}


# Parse a format line that gives the format for one row of table cells.
# Push each cell structure onto the @row array, then push that array onto
# the @format array.

sub ParseFormatLine
{
my ($key, $coltype);
my (@row) = ();		# array to hold this row's column entries
my ($col);		# pointer to current column entry hash
my ($border);
my ($v);

	$_[0] =~ s/\s+/ /g;	# convert runs of whitespace to single spaces
	@c = split (//, $_[0]);	# split up line, one char per array element

	for (;;)
	{
		SkipWhiteSpace ();
		last if !@c;

		# allocate an "empty" column info structure, using defaults

		$col = NewColumnEntry ();

		# Look for | or || separator.  If present, pull off and change
		# the left border value.  Also change column type to
		# $col_borderonly just in case no "real" column type follows
		# (i.e., if the | or || is the last thing on the line).

		if ($c[0] eq "|")
		{
			shift (@c);
			$border = $bord_left_s;
			if ($c[0] eq "|")
			{
				shift (@c);
				$border = $bord_left_d;
			}
			$col->{TYPE} = $col_borderonly;
		}

		SkipWhiteSpace ();
		if (!@c)
		{
			push (@row, $col) if $col->{TYPE} == $col_borderonly;
			last;
		}

		$key = shift (@c);

		$coltype = ColType ($key);
		if ($coltype eq $col_unknown)
		{
			Warn ("unknown column key: $key");
			return;
		}

		# reset column type in column struct
		$col->{TYPE} = $coltype;

		$col = ParseColumnOptions ($col);
		push (@row, $col);
	}

	# For each column entry that has a left border (single or double)
	# specified, turn on the right border of the previous column entry.
	# Then if the final entry is only a border, remove it (this happens
	# when a format line ends with | or ||).

	for $v (1 .. @row-1)
	{
		$border = ($row[$v]->{BORDER} & $bord_left);
		next unless $border;		# no border specified
		if ($border == $bord_left_s)
		{ $border = $bord_right_s; }
		elsif ($border == $bord_left_d)
		{ $border = $bord_right_d; }
		else
		{ Die ("border calculation logic error"); }
		$row[$v-1]->{BORDER} |= $border;
	}

	pop (@row) if @row && $row[$#row]->{TYPE} eq $col_borderonly;

	# If this format row is not from the initial format section, then
	# it's known how many columns should be in the row.  Add "L" format
	# columns if the row is short or shed excess columns if the row is
	# too long.

	@row = SquareUpFormatRow (@row) if $section_num > 1;

	push (@format, [ @row ]);
}


# Peel off column options that follow the column key character.  Quit when
# end of line or another column key character is found.

sub ParseColumnOptions
{
my ($col) = $_[0];
my ($key, $opt, $have_opts);

	$have_opts = 0;

	for (;;)
	{
		SkipWhiteSpace ();
		last if !@c;
		# exit loop if next char begins a new column spec
		last if ColType ($c[0]) ne $col_unknown;

		$have_opts = 1;
		$key = shift (@c);
		if (Digit ($key))	# number = column separation
		{
			$col->{SEP} = $key . ExtractInteger ();
			next;
		}
		if (uc ($key) eq "T")	# vertically span from top
		{
			$col->{VADJUST} = "T";
			next;
		}
		if (uc ($key) =~ /^[BI]$/)	# bold or italic
		{
			$col->{FONT} = uc ($key);
			next;
		}
		if (uc ($key) eq "F")	# font setting
		{
			if (!@c)
			{
				Warn ("missing font after $key");
				next;
			}
			if (Digit ($c[0]))
			{
				$opt = ExtractInteger ();
				if ($opt eq "")
				{
					Warn ("missing font after $key");
					next;
				}
			}
			else
			{
				# Not numeric; allow one or two alpha
				# characters.  If a single character, must be
				# followed by space if there are other option
				# characters following the font letter; in this
				# case, eat the space as well.
				if ($c[0] !~ /^[A-Z]$/i)
				{
					Warn ("malformed font spec");
					next;
				}
				$opt = shift (@c);
				$opt .= shift (@c) if @c && $c[0] =~ /^[A-Z]$/i;
			}
			$col->{FONT} = $opt;
			next;
		}
		if (uc ($key) eq "P")	# point size change
		{
			$opt = ExtractSignedInteger ();
			Warn ("missing value after $key") if !$opt;
			$col->{PTSIZE} = $opt;
			next;
		}
		if (uc ($key) eq "V")	# vertical space change
		{
			$opt = ExtractSignedInteger ();
			Warn ("missing value after $key") if !$opt;
			$col->{VSPACE} = $opt;
			next;
		}
		if (uc ($key) eq "W")	# width specification
		{
			if (!@c || (!Digit($c[0]) && $c[0] ne "("))
			{
				Warn ("missing width after $key");
				next;
			}
			if (Digit ($c[0]))	# for int, no ()'s
			{
				$opt = ExtractInteger ();
			}
			elsif ($c[0] eq "(")
			{
				shift (@c);
				$opt = "";
				$opt .= shift (@c) while @c && $c[0] ne ")";
				shift (@c) if $c[0] eq ")";
			}
			$col->{WIDTH} = $opt;
			next;
		}
		if (uc ($key) eq "E")	# equal-width columns
		{
			$col->{EQWIDTH} = "y";	
			next;
		}
		Warn ("unknown column option: $key");
	}

	Warn ("options are irrelevant for column type $col->{TYPE}")
		if $have_opts && ($col->{TYPE} eq $col_hspan
			|| $col->{TYPE} eq $col_line
			|| $col->{TYPE} eq $col_dbl_line);

	return ($col);
}


# Test whether or not the argument is a single character representing a digit

sub Digit
{
	return ($_[0] =~ /^\d$/);
}


# Pull an integer from the character buffer and return it, or "" if no
# integer is present.

sub ExtractInteger
{
my ($num) = "";

	$num .= shift (@c) while @c && Digit ($c[0]);
	return ($num);
}


# Like ExtractInteger(), but allows an option sign (+ or -) preceding
# the integer.  Returns "" if there is no number, regardless of whether
# or not there is a sign character.

sub ExtractSignedInteger
{
my ($sign, $num);

	$sign = "";
	$sign = shift (@c) if @c && $c[0] =~ /^[-+]$/;
	$num = ExtractInteger ();
	return ($sign . $num) if $num;
	return ("");
}


sub SkipWhiteSpace
{
	shift (@c) while $c[0] eq " ";
}


sub ColType
{
my ($c) = $_[0];

	$c = uc ($c);
	return ($c) if $c =~ /^[_=^ACLNRS|]$/;
	return ($col_unknown);
}


sub NewColumnEntry
{
	return ( {
		TYPE	=> $col_unknown,
		BORDER	=> 0,	# 0 = no border
		SEP	=> "",	# blank = use default
		FONT	=> 0,	# 0 = use default
		PTSIZE	=> 0,	# 0 = use default
		VSPACE	=> 0,	# 0 = use default
		VADJUST	=> "C",	# C = centered vertical adjustment
		WIDTH	=> "",	# blank = use default
		EQWIDTH	=> "n"	# n = not specified as equal-width
	} );
}


# Make sure format row has $tblcols columns by adding "L" entries to short
# rows or by shedding excess columns as necessary.  Before this is called
# $tblcols should already have been set to the number of column entries in
# a row.

sub SquareUpFormatRow
{
my ($cellinfo);

	# Widen short row as necessary

	while (@_ < $tblcols)
	{
		$cellinfo = NewColumnEntry ();
		$cellinfo->{TYPE} = $col_left;
		push (@_, $cellinfo);
	}

	# Strip excess columns as necessary

	if (@_ > $tblcols)
	{
		Warn ("excess format specifiers discarded");
		pop (@_) while @_ > $tblcols;
	}

	return (@_);
}


# Based on the formats for the column entries seen thus far, calculate those
# values that cannot be changed from now on, even if other format sections are
# given by .T&.  These values include:
# - the number of columns
# - column separations
# - column widths
# - the set of columns to be made equal width
#
# This is called after the initial set of format lines has been collected.

sub CalculateFormatConstants
{
my ($sep, $width, $eqwidth);
my ($colglobal, $cellinfo);
my ($h, $v);

	return if $section_num > 1;	# return if values are already frozen

	# compute maximum number of columns

	$tblcols = 0;
	for $v (0 .. @format-1)
	{
		$tblcols = @{$format[$v]} if $tblcols < @{$format[$v]};
	}

	# Make sure each format line has $tblcols columns

	for $v (0 .. @format-1)
	{
		@{$format[$v]} = SquareUpFormatRow (@{$format[$v]});
	}

	@columnglobal = ();
	for $h (0 .. $tblcols-1)
	{
		$sep = -1;
		$width = "";
		$eqwidth = "n";
		for $v (0 .. @format-1)
		{
			Die ("CalculateFormatConstants: logic error")
				unless defined $format[$v][$h];
			$cellinfo = $format[$v][$h];
			# width is overridden by last explicit width
			$width = $cellinfo->{WIDTH} if $cellinfo->{WIDTH};
			# sep is largest requested separation
			$sep = $cellinfo->{SEP}
				if $cellinfo->{SEP} && $sep < $cellinfo->{SEP};
			# eqwidth is set if any cell in column requests it
			$eqwidth = "y" if $cellinfo->{EQWIDTH} eq "y";
		}
		$sep = 3 if $sep eq -1;	# default 3 if no explicit value given
		$width = 0 if $width eq "";
		$colglobal = {
			SEP	=> $sep,
			WIDTH	=> $width,
			EQWIDTH	=> $eqwidth
		};
		push (@columnglobal, $colglobal);
	}
}


# Parse a row of table data.  Things that need to be handled here:
# - The line might be continued to the next with \ at the end
# - The line might really be a troff command (the rule tbl uses is
#   that troff commands begin with . followed by anything but a number)
# - Data fields may be multi-line if the last thing on the row is T{,
#   in which case the field continues until a line that begins with T}.
# - The data values will be written out (in DumpTable()) on separate
#   output lines, so values that begin with a dot must be escaped with
#   \&.  Have to be somewhat careful here because a multi-line value
#   (between T{ and T}) might begin with a troff command itself, and
#   thus shouldn't be escaped.  (This means that if a line doesn't look
#   like a troff command, escape all fields that begin with a dot,
#   EXCEPT that multi-line fields are escaped only if they begin with
#   a dot followed by a number.  Sheesh.)
# - As it turns out, it's also useful to escape with \& any data field
#   that begins with whitespace, to suppress extraneous break that
#   that troffcvt would produce as a result of a line beginning with
#   whitespace.

sub ParseDataRow
{
my (@d, @d2);	# data values
my ($data);

	# if line is continued (\ at the end), keep reading
	while (/\\$/ && !eof)
	{
		s/\\$//;		# chop trailing backslash
		$_ .= <>;
		chomp;
	}

	# Line is a troff command if it begins with a period followed
	# by anything but a number.

	if (/^\.(\D|$)/)
	{
		push (@rawdata, $_);
		push (@formatidx, -2);	# line isn't associated with format row
		return;
	}


	# Split up data line, then keep reading input lines as long
	# as the current data ends with "T{".

	@d = split ($tab, $_);

	# Prepend \& to any fields that begin with . followed by a number,
	# to prevent a field like ".13" from being interpreted as a troff
	# command by troffcvt.  Also escape fields beginning with whitespace.

	foreach (@d)
	{
		s/^([.\s])/\\\&\1/;
	}

	while (@d && $d[$#d] eq "T{")
	{
		pop (@d);		# drop the "T{"
		$data = "";	# read until T}, use to replace T{
		while (<>)
		{
			chomp;
			# if line is continued (\ at the end), keep reading
			while (/\\$/ && !eof)
			{
				s/\\$//;
				$_ .= <>;
				chomp;
			}
			Die (".TE found while in T{...T}") if /^\.\s*TE\s*$/;
			if (!/^T}(\Q$tab\E|$)/)
			{
				$data .= "\n" if $data;
				$data .= $_;
				next;
			}
			# escape whitespace or dot followed by number
			$data =~ s/^(\.\d)/\\\&\1/;
			$data =~ s/^(\s)/\\\&\1/;
			push (@d, $data);
			@d2 = split ($tab, $_);
			shift (@d2);		# shift off the "T}"
			# escape any fields after the T{...T} field that
			# begin with dot or whitespace
			foreach (@d2)
			{
				s/^([.\s])/\\\&\1/;
			}
			push (@d, @d2);
			last;
		}
		last if eof;
	}

	# Push data row onto data matrix array, remember the index into
	# the format array of the format row that applies to this data row,
	# then (if we haven't reach the end of the formats) bump the index.
	# If the data row is a line-drawing row, don't associate it with a
	# format row.

	push (@rawdata, [ @d ]);
	if (LineDrawingRow (@d))
	{
		push (@formatidx, -1);
	}
	else
	{
		push (@formatidx, $fmtidx);
		++$fmtidx unless $fmtidx == @format - 1;
	}
}


# Check whether or not a row of cells is a line-drawing row, defined here as:
# - A row with one column that is either "_" or "=" ("\_" doesn't count)
# - A row with at least $tblcols columns, where the first $tblcols columns
#   are line-drawing instructions ("_", "\_, or "=").  (If there are more
#   than $tblcols columns, they're ignored.)

sub LineDrawingRow
{
my ($celldata);

	return (0) if !@_;	# completely blank line is not line-drawing
	return (1) if @_ == 1 && $_[0] =~ /^[_=]$/;
	return (0) if @_ > 1 && @_ < $tblcols;	# incomplete line

	foreach $celldata (@_)
	{
		return (0) if !LineDrawingCell ($celldata);
	}
	return (1);
}


sub LineDrawingCell
{
my ($celldata) = $_[0];

	return ($celldata eq "_" || $celldata eq "\\_" || $celldata eq "=");
}


# Dump out table data

# The @type, @vspan, and @hspan arrays are used as matrices, with one
# member for each data cell in the table.  The value of each @type
# element is the format type for the cell.  The value of @vspan and
# @hspan elements is the number of cells that the cell spans.  If the
# value is 1, the cell spans only itself (the usual case).  For values > 1,
# the cell spans down (for @vspan) or to the right (for @hspan) the
# given number of cells (including itself).  If the value is 0, it means
# the cell is spanned by a cell from above (for @vspan) or to its left
# (for @hspan).

# A tricky part is that vertical spans can be indicated in the cell format
# section (^ column type) or in the data section (\^ data value).  This is
# handled by having \^ data values override the column format values as
# necessary.


sub DumpTable
{
my (@type, @vspan, @hspan, $vs, $hs);
my (@fmtrow, $fmttype);
my (@datarow, @ndatarow, $celldata);
my ($nrows, $ncols, $spanidx);
my ($cellinfo, $celltype, $colglobal);
my ($h, $v, $h2, $v2, $i);

	Die ("logic error, \@formatidx != \@rawdata") if @formatidx != @rawdata;

	$ncols = $tblcols;
	if ($ncols == 0)
	{
		Warn ("no table format given, table skipped");
		return;
	}

	# Calculate the @type, @hspan, and @vspan arrows for doing span
	# calculations.  The dimensions will be $nrows x $ncols.  The
	# span values are initialized to "1" (this indicates the default
	# case where each cell spans only itself).  The type values come
	# come from the format entries, possibly overridden by the data
	# values.

	# $nrows is the number of non-troff-command rows
	# $ncols is the number of columns in the table

	@type = ();
	@hspan = ();
	@vspan = ();

	$nrows = 0;

	for $v (0 .. @rawdata-1)	# for each data row
	{
		$fmtidx = $formatidx[$v];
		next if $fmtidx == -2;	# row is a troff command

		++$nrows;

		push (@hspan, [ split (//, "1" x $ncols) ]);
		push (@vspan, [ split (//, "1" x $ncols) ]);

		# Construct types for this row.  If $fmtidx is -1, the row
		# is a line-drawing row, so set the type to "_" for each cell.
		# Otherwise use the assocated format row to determine the
		# types, but allow those to be overridden if data values are
		# "\^" or line-drawing types.

		# Types used in the @type array:
		# $col_vspan	^ columns
		# $col_hspan	s columns
		# $col_line	_ or = columns
		# $col_X	any non-span, non-line column

		# Elements are set to $col_visited as they are visited
		# during span calculations.

		@datarow = @{$rawdata[$v]};
		$fmtidx = $formatidx[$v];

		# If a row is a line-drawing row and it consists of a
		# single data value (meaning it's a table-width row),
		# set up the type row as $col_line followed by $ncols-1
		# spanning columns.  This helps catch attempts by other
		# cells to span vertically through the line.
		if ($fmtidx == -1)		# line-drawing row
		{
			if (@datarow == 1)	# table-width line
			{
				push (@type,
					[ $col_line ,
					split (//, $col_hspan x ($ncols-1)) ]);
			}
			else
			{
				push (@type,
					[ split (//, $col_line x $ncols) ]);
			}
			next;
		}

		# if $fmtidx still < 0, it's a troff command; shouldn't happen
		Die ("logic error, \$fmtidx = $fmtidx") if $fmtidx < 0;

		@fmtrow = ();
		@ndatarow = ();

		for $h (0 .. $ncols-1)
		{
			# Get the type.  Map all line types to $col_line for
			# simplicity, and map all non-line, non-span types
			# to $col_X

			$fmttype = $format[$fmtidx][$h]->{TYPE};
			if ($fmttype eq $col_vspan || $fmttype eq $col_hspan)
			{
				push (@fmtrow, $fmttype);
				next;
			}
			$fmttype = $col_line if $fmttype eq $col_dbl_line;
			$fmttype = $col_X if $fmttype ne $col_line;
			# If there's no more data in row, format determines type
			if (!@datarow)
			{
				push (@fmtrow, $fmttype);
				next;
			}

			# Look at cell data to see if it overrides format
			# (and delete it from data row if it's "\^")
			$celldata = shift (@datarow);

			if (LineDrawingCell ($celldata))	# line
			{
				# lines can only override line and "X"
				if ($fmttype ne $col_line && $fmttype ne $col_X)
				{
					Die ("bad data (row $v, col $h)");
				}
				$fmttype = $col_line;
				push (@ndatarow, $celldata);
			}
			elsif ($celldata eq "\\^")		# vertical span
			{
				# spans can only override "X"
				if ($fmttype ne $col_X)
				{
					Die ("bad data (row $v, col $h)");
				}
				$fmttype = $col_vspan;
			}
			else
			{
				push (@ndatarow, $celldata);
			}
			push (@fmtrow, $fmttype);
		}

		Die ("\@fmtrow logic error") if @fmtrow != $ncols;

		push (@type, [ @fmtrow ]);

		# @ndatarow now contains all non-\^ data values;
		# use it to replace the original data row.
		@{$rawdata[$v]} = @ndatarow;
	}

	#print "cell type:\n";
	#PrintArray (@type);

	if ($nrows == 0)
	{
		Warn ("no table data given, table skipped");
		return;
	}

	# Preliminary check: can't span horizontally in first column or
	# vertically in first row.

	foreach $v (0 .. $nrows-1)
	{
		next if $type[$v][0] ne $col_hspan;
		Die ("cannot span horizontally in first column");
	}
	foreach $h (0 .. $ncols-1)
	{
		next if $type[0][$h] ne $col_vspan;
		Die ("cannot span vertically in first row");
	}

	# Visit each cell of the type matrix, determine its horizontal/vertical
	# span extents to determine the size of the block the cell should span,
	# then verify that all cells in that block are indeed spanned.  This
	# also sets the @hspan and @vspan matrix entries to the correct span
	# count for later when the table data values get written out.

	# If a cell spans down, its vspan value is the number of rows it
	# spans, including itself.  If a cell is spanned by a cell above,
	# its vspan value is zero.  If a cell spans to the right, its hspan
	# value is the number of columns it spans, including itself.  If a
	# cell is spanned by a cell to the left, its hspan value is zero.

	foreach $v (0 .. $nrows-1)
	{
		foreach $h (0 .. $ncols-1)
		{
			# Skip the cell if it has already been visited
			next if $type[$v][$h] eq $col_visited;

			# The upper left corner of a span block can't be a
			# spanning type.
			Die ("illegal span at row ", $v+1, ", column ", $h+1)
					if $type[$v][$h] eq $col_vspan
						|| $type[$v][$h] eq $col_hspan;

			# Mark cell as visited
			$type[$v][$h] = $col_visited;

			# Determine how many cells left and down this cell
			# spans by looking at cells lying to right of and down
			# from it.  Size is always at least 1 x 1.

			$vs = $hs = 1;

			foreach $v2 ($v+1 .. $nrows-1)
			{
				last unless $type[$v2][$h] eq $col_vspan;
				++$vs;
			}
			foreach $h2 ($h+1 .. $ncols-1)
			{
				last unless $type[$v][$h2] eq $col_hspan;
				++$hs;
			}

			# Examine the spanned block, examining for illegal spans
			# and marking all the cells in the block as visited.
			# Set the span counts as follows:
			# - Leave upper left corner as set above
			# - All cells directly below get same hspan as upper
			#   left and vspan of 0
			# - All cells directly to right get same vspan as upper
			#   left and hspan of 0
			# - All other cells in block get hspan, vspan of 0

			# Examine rest of spanned block (don't need to look at
			# first row or column; have already looked at them).
			# All these cells must span up or left.

			foreach $v2 ($v .. $v + $vs-1)
			{
				foreach $h2 ($h .. $h + $hs-1)
				{
					# set the span counts
					$vspan[$v2][$h2] =
						($v2 == $v ? $vs : 0);
					$hspan[$v2][$h2] =
						($h2 == $h ? $hs : 0);

					# mark cell as visited
					$celltype = $type[$v2][$h2];
					$type[$v2][$h2] = $col_visited;

					# type for all cells but the upper left
					# one must be a spanned type

					next if $v2 == $v && $h2 == $h;

					if ($celltype ne $col_vspan
					    && $celltype ne $col_hspan)
					{
						Die ("illegal span at row ",
								$v2+1,
								", column ",
								$h2+1)
					}
				}
			}
		}
	}

	#print "vspan:\n";
	#PrintArray (@vspan);

	#print "hspan:\n";
	#PrintArray (@hspan);

	# Now the table data can be written out.  Finally!
	# Take care to guard against the fact that data rows can be
	# troff commands, short, or even blank.

	# Write the initial table request line.

	print "$tbl_begin $nrows $ncols $header_rows $align $expand";
	print " $box $all_box $double_box";
	print "\n";

	# Write column globals

	foreach $h (0 .. $ncols-1)
	{
		$colglobal = $columnglobal[$h];
		print "$tbl_col_info";
		print " $colglobal->{WIDTH}";
		print " $colglobal->{SEP}";
		print " $colglobal->{EQWIDTH}";
		print "\n";
	}

	# Only non-troff command data rows have entries in the span arrays.
	# $spanidx keeps track of which span array rows to use for a given
	# data row.

	$spanidx = -1;

	# Write table data

	foreach $v (0 .. @rawdata-1)
	{
		$datidx = $dataidx[$v];
		$fmtidx = $formatidx[$v];	# format row to use

		if ($fmtidx == -2)		# troff command
		{
			print "$rawdata[$v]\n";
			next;
		}

		@datarow = @{$rawdata[$v]};
		++$spanidx;

		if ($fmtidx == -1)		# line-drawing row
		{
			WriteLineDrawingRow ($ncols, @datarow);
			next;
		}

		# line is not line-drawing, or is only partially so, or line
		# types in different columns are different

		print "$tbl_row_begin\n";

		foreach $h (0 .. $ncols-1)
		{
			$vs = $vspan[$spanidx][$h];
			$hs = $hspan[$spanidx][$h];

			$cellinfo = $format[$fmtidx][$h];
			Die ("DumpTable: logic error") if !$cellinfo;
			$celltype = $cellinfo->{TYPE};
			print "$tbl_cell_info";
			print " $celltype $vs $hs";
			print " $cellinfo->{VADJUST}";
			print " $cellinfo->{BORDER}";
			print "\n";
		}

		foreach $h (0 .. $ncols-1)
		{
			$vs = $vspan[$spanidx][$h];
			$hs = $hspan[$spanidx][$h];

			# Print a spanned-cell marker if the cell is spanned
			# from the left or from the top.  Otherwise shift off
			# the cell's data value and proceed to write cell info.
			# (A cell is spanned if either span value is zero.)

			if ($hs == 0 || $vs == 0)
			{
				print "$tbl_spanned_cell\n";
				next;
			}

			$celldata = shift (@datarow);
			$cellinfo = $format[$fmtidx][$h];
			Die ("DumpTable: logic error") if !$cellinfo;
			$celltype = $cellinfo->{TYPE};
			# does the format or data value require line drawing?
			if ($celltype eq $col_line || $celldata eq "_")
			{
				Warn ("non-empty data for _ format ignored")
					if $celldata && $celldata ne "_";
				Warn ("line spans vertically") if $vs > 1;
				print "$tbl_cell_line 1\n";
				next;
			}
			if ($celltype eq $col_dbl_line || $celldata eq "=")
			{
				Warn ("non-empty data for = format ignored")
					if $celldata && $celldata ne "=";
				Warn ("line spans vertically") if $vs > 1;
				print "$tbl_cell_line 2\n";
				next;
			}
			if ($celldata eq "\\_")
			{
				print "$tbl_cell_line 0\n";
				next;
			}
			# Not a line-drawing cell, just regular data.  If
			# the cell is empty, print the shorthand request,
			# otherwise print the begin and end requests, with
			# the data in between.
			if ($celldata eq "")
			{
				print "$tbl_empty_cell\n";
				next;
			}
			print "$tbl_cell_begin";
			print " $cellinfo->{FONT}";
			print " $cellinfo->{PTSIZE}";
			print " $cellinfo->{VSPACE}";
			print "\n";
			print "$celldata\n";
			print "$tbl_cell_end\n";
		}
		print "$tbl_row_end\n";

		# warn about any excess data
		ExcessData (@datarow) if @datarow;
	}

	# Finish table

	print "$tbl_end\n";
}


# Write a line-drawing row.  First argument is the number of columns in a
# row, second argument is the data array.

# If the line indicates a table-width single or double line, a .T*row*line
# request will be written.  A table-width line is indicated if:
# - The data row consists of exactly one "_" or "=" value ("\_" doesn't count)
# - The data row consists of enough values to fill an entire row and the
#   values are all "_" or all "="
# If neither condition holds, individual cell requests are written as
# .T*cell*line requests between .T*row*begin and .T*row*end requests.

sub WriteLineDrawingRow
{
my ($ncols, @datarow) = @_;
my (@excess);
my ($h, $celldata, $homogeneous);

	# look only at first $ncols values in row (there may be extraneous
	# values, but they don't enter into the calculations).

	@excess = ();
	if (@datarow > $ncols)
	{
		@excess = splice (@datarow, $ncols);
	}

	Die ("WriteLineDrawingRow: logic error") unless LineDrawingRow (@datarow);

	# determine whether the cell values are "_" or "=" and all the same,
	# and whether line consists of a single data value or exactly $ncols
	# values.  (A line consisting of exactly one "_" or "=" is homogeneous.
	# But a line consisting of, e.g., two "_" values is not homogeneous
	# if the table is three columns wide, because there is an implied empty
	# (i.e., not line-drawing) third cell.)

	$homogeneous = 0;
	if ($datarow[0] =~ /^[_=]$/ && (@datarow == 1 || @datarow == $ncols))
	{
		# First cell is "_" or "=", are all the rest the same?
		$homogeneous = 1;
		for $h (1 .. @datarow-1)
		{
			if ($datarow[$h] ne $datarow[0])
			{
				$homogeneous = 0;
				last;
			}
		}
	}

	if ($homogeneous)		# write a table-width line
	{
		print "$tbl_row_line ";
		if ($datarow[0] eq "_")
		{ print "1"; }
		elsif ($datarow[0] eq "=")
		{ print "2"; }
		else
		{ Die ("WriteLineDrawingRow: logic error"); }
		print "\n";
	}
	else				# write individual cell-line requests
	{
		print "$tbl_row_begin\n";
		for $h (0 .. $ncols-1)
		{
			$celldata = shift (@datarow);
			if ($celldata eq "")
			{
				print "$tbl_empty_cell\n";
			}
			else
			{
				print "$tbl_cell_line ";
				if ($celldata eq "\\_")
				{ print "0"; }
				elsif ($celldata eq "_")
				{ print "1"; }
				elsif ($celldata eq "=")
				{ print "2"; }
				else
				{ Die ("logic error"); }
				print "\n";
			}
		}
		print "$tbl_row_end\n";
	}

	# warn about any excess data
	ExcessData (@excess) if @excess;
}


# Print warning for any discarded cell data.  For multiline
# data, trim extra lines and replace with "..." before printing.

sub ExcessData
{
my ($celldata);

	while (@_)
	{
		($celldata = shift (@_)) =~ s/\n.*/.../s;
		Warn ("excess data discarded: $celldata");
	}
}


sub PrintArray
{
my ($nrows, $ncols);
my ($v, $h);

	# determine number of rows and columns; assume each row has same
	# number of columns;

	$nrows = @_;
	$ncols = @_ ? @{$_[0]} : 0;

	for $v (0 .. $nrows-1)
	{
		for $h (0 .. $ncols-1)
		{
			print " ", $_[$v][$h];
		}
		print "\n";
	}
}


# Sum up the elements of an array

sub SumArray
{
my ($nrows, $ncols);
my ($v, $h);
my ($sum);

	# determine number of rows and columns; assume each row has same
	# number of columns;

	$nrows = @_;
	$ncols = @_ ? @{$_[0]} : 0;

	$sum = 0;

	for $v (0 .. $nrows-1)
	{
		for $h (0 .. $ncols-1)
		{
			$sum += $_[$v][$h];
		}
	}
	return ($sum);
}


sub Warn
{
	warn "$prog:line $. (table $table_num, section $section_num): ", join ("", @_), "\n";
}


sub Die
{
	Warn (@_);
	die "$prog quits\n";
}
