#!/usr/pkg/bin/perl -- # -*- Perl -*-

$VERSION = "2.0beta6";

use strict;
use Getopt::Long;

my $homedir = $0;
$homedir =~ s/\\/\//g;
$homedir =~ s/^(.*)\/[^\/]+$/$1/;

unshift (@INC, $homedir);

require 'XML/DTDParse/DTD.pm';

my $usage = "Usage: $0 <<opts>> dtd\n";

my %option = ('debug' => 0,
	      'verbose' => 1,
	      'output' => '-',
	      'declaration' => '');

my %opt = ();
&GetOptions(\%opt,
	    'debug+',
	    'verbose+',
	    'output=s',
	    'catalog=s@',
	    'preserve=s@',
	    'declaration=s') || die $usage;

foreach my $key (keys %option) {
    $option{$key} = $opt{$key} if exists($opt{$key});
}

my @catalogs = exists($opt{'catalog'}) ? @{$opt{'catalog'}} : ();
my @preserve = exists($opt{'preserve'}) ? @{$opt{'preserve'}} : ();
my $output = $option{'output'};

if ($output ne '-') {
    open (OUT, ">$output");
} else {
    *OUT = *STDOUT;
}

my $file = shift @ARGV;
my $xmlfile = $option{'output'};

if (defined($file) && $xmlfile eq '') {
    $xmlfile = "$file.xml";
}

my $dtd = new XML::DTDParse::DTD ('Verbose' => $option{'verbose'},
				  'Debug' => $option{'debug'},
				  'SgmlCatalogFilesEnv' => $option{'use-sgml-catalog-files'},
				  'SourceDtd' => $file,
				  'Declaration' => $option{'declaration'});

foreach my $catalog (@catalogs) {
    $dtd->parseCatalog($catalog);
}

$dtd->parse($file);

my $declcount = $dtd->declaration_count();
my @decls = $dtd->declarations();

my %peindex = ();

for (my $count = 0; $count < $declcount; $count++) {
    $peindex{$decls[$count]->name()} = $count
	if $decls[$count]->type() eq 'param';
}

print STDERR "$declcount declarations.\n";
print STDERR "Calculating used entities...\n";

my %usedPE = ();

foreach my $decl (@decls) {
    if ($decl->type() eq 'element') {
	my $cm = $decl->content_model();
	while ($cm =~ /^(.*?)%(\S+?);/s) {
	    my $pe = $2;
	    $usedPE{$pe} = 0 if !exists($usedPE{$pe});
	    $usedPE{$pe}++;
	    $cm = $';
	}
    } elsif ($decl->type() eq 'attlist') {
	my $text = $decl->text();
	while ($text =~ /^(.*?)%(\S+?);/s) {
	    my $pe = $2;
	    $usedPE{$pe} = 0 if !exists($usedPE{$pe});
	    $usedPE{$pe}++;
	    $text = $';
	}
    }
}

# Now we know which elements use them, let's recurse...

my %checkedPE = ();
my $changed = 1;

while ($changed) {
    $changed = 0;

    foreach my $decl (@decls) {
	if ($decl->type() eq 'param') {
	    my $name = $decl->name();
	    my $text = $decl->text();

	    if ($usedPE{$name} && !$checkedPE{$name}) {
		$checkedPE{$name} = 1;
		$changed = 1;

		while ($text =~ /^(.*?)%(\S+?);/s) {
		    my $pe = $2;
		    $usedPE{$pe} = 0 if !exists($usedPE{$pe});
		    $usedPE{$pe}++;
		    $text = $';
		}
	    }
	}
    }
}

# now output the flattened DTD

print OUT "<!-- *********************************************************************
     *** THIS IS THE FLATTENED DTD. DO NOT EDIT THIS DTD BY HAND, EDIT ***
     *** THE CUSTOMIZATION LAYER AND REGNERATE THE FLATTENED DTD! ********
     ********************************************************************* -->\n\n";

print OUT "<!-- Flattened:\n";
print OUT "     Public: ", $dtd->{'PUBLIC_ID'}, "\n";
print OUT "     System: ", $dtd->{'SYSTEM_ID'}, "\n";
print OUT "-->\n\n";

foreach my $decl (@decls) {
    if ($decl->type() eq 'element') {
	my $name = $decl->name();
	my $cm = $decl->content_model();

	$cm = &expandPE($cm);

	print OUT "<!ELEMENT $name $cm>\n";
    } elsif ($decl->type() eq 'attlist') {
	my $name = $decl->name();
	my $text = $decl->text();

	$text = &expandPE($text);

	print OUT "<!ATTLIST $name";
	print OUT $text;
	print OUT ">\n";
    } elsif ($decl->type() eq 'param') {
	my $name = $decl->name();
	my $keep = 0;

	if ($usedPE{$name}) {
	    foreach my $re (@preserve) {
		$keep = 1 if $name =~ /$re/;
		last if $keep;
	    }

	    if ($keep) {
		my $text = $decl->text();
		$text = &expandPE($text);

		my $quote = '"';
		if ($text =~ /\"/s) {
		    $quote = "'";
		    $text =~ s/\'/\&apos;/sg;
		}

		print OUT "<!ENTITY % $name $quote$text$quote>\n";
	    }
	}
    } elsif ($decl->type() eq 'gen') {
	my $name = $decl->name();
	my $public = $decl->public();
	my $system = $decl->system();

	if ($public || $system) {
	    print OUT "<!ENTITY $name ";

	    if ($public) {
		my $quote = '"';
		if ($public =~ /\"/s) {
		    $quote = "'";
		    $public =~ s/\'/\&apos;/sg;
		}
		print OUT "PUBLIC $quote$public$quote";
	    }

	    if ($system) {
		my $quote = '"';
		if ($system =~ /\"/s) {
		    $quote = "'";
		    $system =~ s/\'/\&apos;/sg;
		}
		print OUT "SYSTEM" if !$public;
		print OUT " $quote$system$quote";
	    }

	    print OUT ">\n";
	} else {
	    my $text = $decl->text();
	    $text = &expandPE($text);

	    my $quote = '"';
	    if ($text =~ /\"/s) {
		$quote = "'";
		$text =~ s/\'/\&apos;/sg;
	    }
	    print OUT "<!ENTITY $name $quote$text$quote>\n";
	}
    } elsif ($decl->type() eq 'sdata'
	     || $decl->type() eq 'pi') {
	my $name = $decl->name();

	print OUT "<!ENTITY $name " . uc($decl->type()) . " ";

	my $text = $decl->text();
	$text = &expandPE($text);

	my $quote = '"';
	if ($text =~ /\"/s) {
	    $quote = "'";
	    $text =~ s/\'/\&apos;/sg;
	}

	print OUT "$quote$text$quote>\n";
    } elsif ($decl->type() eq 'ndata'
	     || $decl->type() eq 'cdata') {
	my $name = $decl->name();
	my $public = $decl->public();
	my $system = $decl->system();

	print OUT "<!ENTITY $name ";

	if ($public) {
	    my $quote = '"';
	    if ($public =~ /\"/s) {
		$quote = "'";
		$public =~ s/\'/\&apos;/sg;
	    }
	    print OUT "PUBLIC $quote$public$quote";
	}

	if ($system) {
	    my $quote = '"';
	    if ($system =~ /\"/s) {
		$quote = "'";
		$system =~ s/\'/\&apos;/sg;
	    }
	    print OUT "SYSTEM" if !$public;
	    print OUT " $quote$system$quote";
	}

	print OUT " ", uc($decl->type()), " ", $decl->notation();
	print OUT ">\n";
    } elsif ($decl->type() eq 'notation') {
	my $name = $decl->name();
	my $public = $decl->public();
	my $system = $decl->system();

	print OUT "<!NOTATION $name ";

	if ($public) {
	    my $quote = '"';
	    if ($public =~ /\"/s) {
		$quote = "'";
		$public =~ s/\'/\&apos;/sg;
	    }
	    print OUT "PUBLIC $quote$public$quote";
	}

	if ($system) {
	    my $quote = '"';
	    if ($system =~ /\"/s) {
		$quote = "'";
		$system =~ s/\'/\&apos;/sg;
	    }
	    print OUT "SYSTEM" if !$public;
	    print OUT " $quote$system$quote";
	}

	if (!$public && !$system) {
	    print OUT "SYSTEM";
	}

	print OUT ">\n";
    } else {
	die "Unexpected declaration type: " . $decl->type() . "\n";
    }
}

print STDERR "Done.\n";

exit;

# =================================================================

sub expandPE {
    my $text = shift;

    my $expanded = "";

    while ($text =~ /%(\S+?);/s) {
	$expanded .= $`;
	my $post = $';
	my $pe = $1;
	my $keep = 0;

	foreach my $re (@preserve) {
	    $keep = 1 if $pe =~ /$re/;
	    last if $keep;
	}

	if ($keep) {
	    $expanded .= "%$pe;";
	    $text = $post;
	} else {
	    my $index = $peindex{$pe};
	    die "Unexpected PE: $pe\n" if !defined($index);
	    $text = $decls[$index]->text() . $post;
	}
    }

    return $expanded . $text;
}
