#!/usr/bin/perl
#
# Revision = '$Id: MIMEStream.pm,v 1.84 2006/01/02 03:18:57 bre Exp $';
# Version = 'Anomy 0.0.0 : MIMEStream.pm';
#
##  Copyright (c) 2000-2004 Bjarni R. Einarsson. All rights reserved.
##  This program is free software; you can redistribute it
##  and/or modify it under the same terms as Perl itself.  
#
# This is essentially a state machine, which allows me to parse and 
# rewrite MIME messages a little bit at a time.  Messages may be as 
# complex as you like:  the parser knows how to decode (and encode) 
# the standard MIME encodings (base64, quoted-printable, 7bit and 
# 8bit) and parse (and create) both multipart/* and message/rfc822 
# parts properly.
#
# Not included are decoders for encrypted sub-parts.  No batteries 
# either.
#

# USAGE:
#
#	... set up parsers ...
#
#	$A = MIMEStream->New(*STDIN, *STDOUT, \%parsers );
#	$A->ParseHeader();
#	$A->ParseBody();
#
# BASIC IDEA:
#
#	The parse() function will scan the top-level message header, invoking 
#   the appropriate parser function.  The parser function is expected to
#   look about like this:
#
#   sub ParseSomething()
#   {
#		my ($reader) = shift;
#
#		# Read header from container part (or input file handle)
#		$reader->ReadHeader();
#
#		... manipulate header ...
#
#		$reader->WriteHeader();
#
#		# Parse part body
#		while (my $line = $reader->Read())
#		{
#			... manipulate line ...
#
#			# Print out Improved Line
#			$reader->Write()
#		}
#
#		... append stuff ...
#
#		# Flush buffers
#		$reader->Write(undef)
#   }
#
#	The Read() function will transparently decode the input using the 
#	supplied decoder functions, and will return "undef" when the end of 
#	the current section is reached.
#
#	The Write() function will transparently encode the output, using the
#	supplied encoder function.  Writing "undef" will flush the encoder's
#	buffers, which is necessary when Base64- and UU-encoding.
#
#	Useful parser functions are included near the bottom of this file.
#
#	By creating a new part object, using the Writer() constructor, a part's
#	encoding may be altered.  Search for "sub Writer" and read the comments.
#	See the "ParserForce8bit" function for an example.
#


##[ Package definition ]######################################################

package Anomy::MIMEStream;
use strict;
use MIME::Base64;
use MIME::QuotedPrint;
use Anomy::Log;
use bytes;

BEGIN {
	use Exporter	();
	use vars		qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
	
	$VERSION		= do { my @r = (q$Revision: 1.84 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
	@ISA	 		= qw(Exporter);
	@EXPORT			= qw(
							&CheckEmail
						);

	@EXPORT_OK		= qw( );
}
use vars @EXPORT_OK;


##[ Variables ]###############################################################

# Define CR, LF, CRLF and their quoted-printable equivalents
my $CR   = "\015";     my $qCR   = "=0D";
my $LF   = "\012";     my $qLF   = "=0A";
my $CRLF = "\015\012"; my $qCRLF = "=0D=0A";

# Default parser values
my $default_parserhash = 
{
#	"text/html"             => \&ParserDiscard,
	"text/*"                => \&ParserForce8bit,
	"message/rfc822"        => \&ParserRFC822,
	"multipart/*"           => \&ParserMultipart,
	"multipart/signed"      => \&ParserCat,
	"multipart/encrypted"   => \&ParserCat,
	"DEFAULT"               => \&ParserCat,
};

# Default decoder values
my $default_decoderhash = 
{
	"DEFAULT"               => \&Decode8bit,
	"8bit"                  => \&Decode8bit,
	"binary"				=> \&Decode8bit,
	"7bit"                  => \&Decode8bit,
	"quoted-printable"      => \&DecodeQP,
	"base64"                => \&DecodeBase64,
	"uue"                   => \&DecodeUU,
	"forwarded"             => \&DecodeFwd,
};

# Default encoder values
my $default_encoderhash = 
{
	"DEFAULT"               => \&Encode8bit,
	"8bit"                  => \&Encode8bit,
	"binary"				=> \&Encode8bit,
	"7bit"                  => \&Encode7bit,
	"quoted-printable"      => \&EncodeQP,
	"base64"                => \&EncodeBase64,
	"uue"                   => \&EncodeUU,
	"forwarded"             => \&EncodeFwd,
};


##[ Functions ]###############################################################

# Constructor:
#
# Usage:	$parser = Anomy::MailStream->New(*INPUT,
#                                            *OUTPUT,
#                                            \%parsers,
#                                            $parent);
#
# The only required arguments are the file handles, there exist default
# values for the rest.
#
# If defined, $parsers should be a reference to a hash of interpretors
# for different content-types.
#
sub New
{
	my $proto = shift;
	my $class = ref($proto) || $proto;
	my ($IN, $OUT, $parsers, $parent) = @_;
	my ($boundary, $boundpre);
	my $log;
	my $p = $parent;
	my $common = undef;

	$IN = ($IN || $p->{"IN"} || \*STDIN);
	$OUT = ($OUT || $p->{"OUT"} || undef);

	$boundary = $p->{"mime"}->{"boundary"};
	$boundpre = $p->{"mime"}->{"_boundpre"};
    $boundpre = "--" unless (defined $boundpre);
	$parsers  = ($parsers || $p->{"parsers"} || $default_parserhash);
	
	if ($parent)
	{
        $common = $parent->{"common"};
	    $log = $parent->{"log"}->sublog("Part", SLOG_TRACE, 
	                                    { pos => $parent->{"Read_Bytes"} });
	}
	else
	{
		$common = {
			"unset"     => 1,     # Common data is accumulated in a few 
			                      # stages, this is set to zero when we're
								  # all done.
			"root"      => undef,
		    "log"       => $log,
			"headers"   => undef,
			"errors-to" => undef,
			"reply-to"  => undef,
		};
	    $log = new Anomy::Log;
	}
	
	# Subparts normally don't mess with newlines...
	my $nl = 0;
	$nl = undef if ($parent);

    my $eol = $LF;
    $eol = $parent->{"EOL"} if ($parent);

	my $part = {
		"parent"   => $parent,
		"common"   => $common,
		"parsers"  => $parsers,
		"decoders" => $default_decoderhash,
		"encoders" => $default_encoderhash,
		"log"      => $log,

		# This buffer always contains decoded data, which is used by
        # Read() for line buffering within parts.
		"IOBuffer"    => "",
		"Read_Bytes"  => 0,
		"Wrote_Bytes" => 0,
		"Wrote_NL"    => 0,

		# This is our end-of-line character.
		#
		"EOL"         => $eol,
        "ENCODED_EOL" => $eol,
		"newline_in"  => $nl,
		"newline_out" => undef,

		# Files and "coparts"
		"INforce"  => !$parent,
		"IN"       => $IN,
		"OUTforce" => !$parent,
		"OUT"      => $OUT,
		"reader"   => undef,
		"writer"   => undef,

		# Misc. flags		
		"eop"	   => undef,
		"debug"    => 0,
		"uupart"   => 0,

		# This is the boundary that marks the end of this part.  Not
		# to be confused with the mime-boundary, which seperates the
		# sub-parts of multipart/* type part.
		"boundary" => $boundary,
        # Default boundary preamble
		"_boundpre" => $boundpre,

		# These contain this part's header, formatted in a few ways
		# for programming conveniance.
		"headers"      => { },
		"rawheader"    => "Content-Type: text/plain$LF"
                         ."Content-Transfer-Encoding: 8bit$LF$LF",
		"cookedheader" => undef,

		# Our assumptions about incoming non-MIME messages.
		"mime" => {
			"boundary"     => undef,
			"_boundpre"    => "--",
			"_type"        => "text/plain",
			"_encoding"    => "8bit",       # Exceeds MIME/RFC822 recommendations.
			"charset"      => "iso-8859-1", # Not quite MIME compliant...
			"_disposition" => "inline",
		},
		"mime-headers" => {
			"content-type" => "_type charset",
			"content-transfer-encoding" => "_encoding",
		},
		"mime-all" => [ ],
	};

	$common->{"root"} = $part if ($common->{"unset"});

	# Support multipart/digest semantics.
	$part->{"mime"}->{"_type"} = "message/rfc822"
		if (($parent) && 
			(lc($parent->{"mime"}->{"_type"}) eq "multipart/digest"));

	$part->{"decoder"} = $part->{"decoders"}->{"DEFAULT"};
	$part->{"encoder"} = $part->{"encoders"}->{"DEFAULT"};

	bless($part, $class);
	return $part;
}


##############################################################################
# Part reader methods

# Top level header parsing routine.
#
# Note:  This routine will stop processing the header after reading 
#        approximately 256k, and pass whatever is left to the body parser.
#        I've never seen a header that long, so I don't expect this to
#        be a serious limitation! :-)
#
sub ParseHeader
{
	my $reader = shift;
	$reader = ($reader->{"reader"} || $reader);

	my $header_log = $reader->{"log"}->sublog("ParseHeader", SLOG_TRACE);

	# Scan part header...
	my $line = $reader->Read();
	$line =~ s/$CR$//; # Map CRLF => LF only.

    # NOTE:  Munging CRLF into LF only here is a bit nasty, since we might
	#        be processing an attachment which is DOS formatted and should
	# strictly speaking be left that way.  This might break things. FIXME!!

	# Gobble up Unix from line if present.
	if ($line =~ /^>?From /)
	{
		$reader->{"UNIX-FROM"} = $line;
		$line = $reader->Read();
		$line =~ s/$CR$//; # Map CRLF => LF only.
	}

	my $header = undef;
	my $headers = { };
	my $rawheader = $line;
    my @misplaced_attributes = ( );
	while (($line !~ /^$CR?$/) && (length($rawheader) < 256000))
	{
	    if ($line =~ /^\s+[^\s]+/)
		{
            if (defined $header)
            {
			    $headers->{$header} .= $line;
            }
            elsif ($line =~ /(file|name)=/i)
            {
                # This is just junk, discard it.
                push @misplaced_attributes, $line;
                $rawheader =~ s/\Q$line\E$//;
            }
		}
		elsif (($header =~ /^content-.*/) && ($line =~ /^\S+=/))
		{
		    # This is a hack to handle broken headers created by some 
			# poorly written viruses.
			$headers->{$header} .= " ".$line;
		}
		elsif ($line =~ /^([^\s]+):\s(.*)$/)
		{
			$header = lc($1);
			$headers->{$header} .= $2.$LF;
            if (($header =~ /^content-disposition/i) && (@misplaced_attributes))
            {
                $headers->{$header} =~ s/;?(\s*)$/;$1/;
                $headers->{$header} .= ' '.join('; ', @misplaced_attributes);
                $rawheader .= ' '.join('; ', @misplaced_attributes);
                @misplaced_attributes = ( );
            }
		}

		$line = $reader->Read();
		$line =~ s/$CR$//; # Map CRLF => LF only.

		$rawheader .= $line;
	}

	# Export...
	$reader->{"headers"} = $headers;
	
	# This checks whether someone thinks they can hide headers by embedding
	# comments within the field names themselves.  This violates RFC822 and
	# few clients will actually understand the sent message, but we detect
	# it anyway...
	foreach my $key (grep(/\(/, keys(%{ $headers })))
	{
	    my $nc = $key;
		$nc =~ s/\(.*?[^\\]\)//g;
		$nc = lc($nc);
		unless ($headers->{$nc})
		{
			$header_log->entry("obfuscated-header", SLOG_WARNING, 
			                   { clean => $nc, ugly => $key },
							   "Detected obfuscated %clean% header: %ugly%");
		    $headers->{$nc} = $headers->{$key} 
		}
	}

    if ($reader->{"common"}->{"unset"})
	{
	    my $c = $reader->{"common"};
	    if (!defined $c->{"reply-to"})
		{
		    # Set reply-to information, if possible.
			foreach my $rt ($headers->{"reply-to"},
			                $headers->{"from"},
							$headers->{"resent-from"},
							$headers->{"return-path"},
							$headers->{"sender"})
		    {
			    if (CheckEmail($reader->DecodeHeader($rt)))
				{
				    $c->{"reply-to"} = $rt;
					chomp $c->{"reply-to"};
					last;
				}
			}
			$header_log->entry("reply-to", SLOG_DEBUG, 
			                   { value => $c->{"reply-to"} },
							   "Using %value% as reply-to address.");
	    }
	
	    if (!defined $reader->{"errors-to"})
		{
		    # Set errors-to information, if possible.
			foreach my $et ($headers->{"errors-to"}, 
			   	  	        $headers->{"return-path"},
							$headers->{"sender"},
							$headers->{"resent-from"},
							$headers->{"reply-to"},
							$headers->{"from"})
		    {
			    if (CheckEmail($reader->DecodeHeader($et)))
				{
				    $c->{"errors-to"} = $et;
					chomp $c->{"errors-to"};
				    last;
			    }
		    }
			$header_log->entry("errors-to", SLOG_DEBUG, 
		                       { value => $c->{"errors-to"} },
                               "Using %value% as errors address.");
	    }

		# Set back-references to root headers.
		$c->{"headers"} = $headers;

		# Got common headers, skip this stuff next time.
		$c->{"unset"} = 0;
	}

	# Get MIME info from a few headers.
	$reader->ParseContentHeader("; _type=", "content-type", $header_log);
	$reader->ParseContentHeader("; _encoding=", "content-transfer-encoding", $header_log);
	$reader->ParseContentHeader("; _disposition=", "content-disposition", $header_log);
	$reader->ParseContentHeader("; _description=", "content-description", $header_log);
	$reader->ParseContentHeader("; _id=", "content-id", $header_log);

	# Set content decoder and encoder
	$reader->{"mime"}->{"_encoding"} =~ s/^\s+//s;
	$reader->{"mime"}->{"_encoding"} =~ s/\s+.*$//s;
	my $enc = lc($reader->{"mime"}->{"_encoding"} || "8bit");
	$reader->{"mime"}->{"_encoding"} = $enc;
	if (my $e = $reader->{"decoders"}->{$enc})
	{
		$reader->{"decoder"} = $e;
        $reader->{"EOL"} = $CRLF if ($enc =~ /^quoted/i);
	}
	if (my $e = $reader->{"encoders"}->{$enc})
	{
		$reader->{"encoder"} = $e;
	}

	$header_log->entry("mime", SLOG_DEBUG, $reader->{"mime"}, 
	                   "Got MIME info: %ATTRIBUTES%");

	# Get rid of Content-Length: and Lines: headers, because we just /know/ 
	# we are going to FUBAR them.
	$rawheader =~ s/^((Lines|Content-Length):.*?)$/X-FUBAR-$1/mgi;
	undef $headers->{"content-length"};
	undef $headers->{"lines"};

	# Export...
	$reader->{"rawheader"} = $rawheader;

	# Decode IOBuffer contents using info from header.
	if (my $decoder = $reader->{"decoder"})
	{
		$reader->{"IOBuffer"} = &$decoder($reader, $reader->{"IOBuffer"});
	}
}

# This routine parses a MIME Content-* header line, grabbing common tags 
# for latter processing.  This routine assumes that tags are unique even 
# across different header boundaries - that is, charset=xyz means the same
# thing whether it occurs in a Content-Type or a Content-Encoding line.
#
# The order of the attributes within each header line is recorded, so we 
# can rebuild them (more or less) accurately later.
#
sub ParseContentHeader
{
	my $reader = shift;
	$reader = ($reader->{"reader"} || $reader);
	my $mime = $reader->{"mime"};
	my $prefix = shift;
	my $header = shift;
	my $header_log = shift;
	my $unobfuscated = shift;

	return unless ($reader->{"headers"}->{ $header });
	my $line = $unobfuscated || $reader->{"headers"}->{ $header };
	
	# Detect stupid "name=file with spaces.exe" attributes as sent by
	# many massmailer viruses.
	$line =~ s/(name=)((?:[^\"\=\;\s]+\s)+[^\s\=\;\s]+)(\s*)$/$1"$2"$3/gim;

	# Fix first attribute so we recognize illegal header values
	$line =~ s/^\s*([^\"\;][^\;]*?)\s*$/\"$1\";/;
	$line =~ s/^\s*([^\"\;][^\;]*);\s*/\"$1\"; /;

	# Add prefix to help parser
	$line =~ s/^\s*/$prefix/;
	my $tmp;
	
	$reader->{"mime-headers"}->{$header} = undef;
	my ($field, $seq, $eq, $data) = ( );
	while (PCHStep(\$line, \$field, \$seq, \$eq, \$data, $header_log))
	{
		my $charset = undef;
		my $comments = undef;

        # Kill trivial quotes...
		$data =~ s/^\"([^\"]*)\"$/$1/s; 

        # Save stuff...
		my $all = { 
		    name => $field,
			raw  => $data,
		};
		if ($field eq "boundary")
		{
		    # We also need the un-decoded boundary string, to handle
     		# boundaries which contain RFC822 comments.
		    $mime->{"undecoded-boundary"} = $data;
		}

        # Kill the non-trivial quotes...
		$data =~ s/\"((?:[^\"\\]|\\.)*)\"/$1/gs;

		# RFC2231 MIME Parameter Value and Encoded Word Extensions. 
		if ($eq eq "*=")
		{
		    # Record charset
		    $charset = $1 if ($data =~ s/^(.*?)\'\'//);

			# Decode data...
			$data =~ s/%([A-F0-9][A-F0-9])/ chr(hex($1)) /gie;

			$all->{"data"}    = $mime->{lc($field)}     = $data;
			$all->{"charset"} = $mime->{"*".lc($field)} = $charset;
		}
		else
		{
	        $all->{"data"}     = $mime->{lc($field)}     = $reader->DecodeHeader($data, 1, \$comments);
		    $all->{"comments"} = $mime->{"#".lc($field)} = $comments if ($comments);
		}

		$reader->{"mime-headers"}->{$header} .= $field . " ";
		push @{ $reader->{"mime-all"} }, $all;
	}
}
sub PCHStep
{
    my ($lref, $fref, $sref, $eref, $dref, $log) = @_;

	my $junk = '';
	if ($$lref =~ s/^([^\=]+)(;[^\=]+=)/$2/s)
	{
	    $junk .= $1;
	}
	while ($$lref =~ s/^(\s*;\s+|\s+;\s*)([^\s=\*]*?)(\(.*?[^\\]\))/$1$2/s)
	{
	    $junk .= $3;
	}
	if ($junk)
	{
	    $log->entry("mime-junk", SLOG_WARNING, 
		            { junk => $junk },
					"Ignored junk while parsing header: %junk%");
	}
	return undef unless ($$lref =~ /=/);
	
	my $found = $$lref =~ s/(?:\s*;\s+|\s+;\s*)              # Seperator...
	                        ([^\s=\*]+?)                     # Attribute name
			                (\*\d+|)                         # RFC2231 sequence ID
			                \s*(\*=|=)\s*                    # = or *= (RFC2231)
   			                ((?:\((?:[^\)\\]*|\\.)?\)        # Data: comments
							   |\"[^\"]*?\"                  # Data: "quoted"
							   |[^\;\s\"][^\s\;]*)*)         # Data: unquoted
			               //six;
	($$fref, $sref, $$eref, $$dref) = (lc($1), $2, $3, $4);

    return $found;
}

# Get all MIME attributes matching a certain pattern
sub GetMIMEAttributes
{
    my ($self, $re_attr) = @_;
    return grep { $_->{"name"} =~ $re_attr } @{ $self->{"mime-all"} };
}

# Get a parser for this part.
# Optionally takes a MIME type as the first argument.
#
sub GetBodyParser
{
	my $reader = shift;
	my $mimetype = shift || lc($reader->{"mime"}->{"_type"});
	my $parser;

	do
	{
		$parser = $reader->{"parsers"}->{$mimetype};
		if (!$parser)
		{
		    $mimetype =~ s/\/\*$//;		# blah/* -> blah
			$mimetype =~ s/[^\/]*$/*/;	# blah/blah -> blah/*
		}
	}
	while ((!$parser) && ($mimetype !~ /^\*$/));
	
	if ($parser)
	{
	    $reader->{"log"}->entry("body", SLOG_TRACE, 
		                        { pos => $reader->{"Read_Bytes"} }, 
								"Parsing body as $mimetype");
	}

	return $parser;
}

# Top level body parsing routine.
#
sub ParseBody
{
	my $reader = shift;
	$reader = ($reader->{"reader"} || $reader);

    my $parser = $reader->GetBodyParser();
	if (!$parser)
	{
		$parser = ($reader->{"parsers"}->{"DEFAULT"} || \&ParserCat);
		$reader->{"log"}->entry("body", SLOG_TRACE, 
		                        { pos => $reader->{"Read_Bytes"} }, 
                                "Parsing body as DEFAULT.");
	}

	return &$parser($reader); 
	die "Ugh!\n";
}

# Switch to reading from an alternate input source.
#
sub ReadFrom
{
	my $reader = shift;	
	$reader = ($reader->{"reader"} || $reader);
	my $fh = shift;

	$reader->{"IN"} = $fh;
	$reader->{"INforce"} = 1;
	$reader->{"newline_in"} = undef;
	$reader->{"eop"} = undef;
	$reader->{"decoder"} = $reader->{"decoders"}->{"8bit"};
}

# This function will return the next few bytes of data in this part, 
# usually about 8k or less.
#
# "undef" will be returned if an attempt is made to read beyond the end 
# of the part.
#
sub RawRead
{
	my $reader = shift;	
	$reader = ($reader->{"reader"} || $reader);

	return undef if ($reader->{"eop"});
	
	my $line;
	if (!$reader->{"INforce"})
	{
		# Slurp at most 8k of input at a time.
		return $reader->{"parent"}->Read(8192);
	}

    # Slurp 8k of input at a time.
    my $bytes = $reader->{"IN"}->read($line, 8192);		
    return undef unless ($bytes);

    if (defined $reader->{"newline_in"})
	{
	    # Figure out what our newline convention is...
	    my $nl = $reader->{"newline_in"};
		if ((!$nl) && ($line =~ /($CR?$LF)/s))
		{
		    my $writer = $reader->{"writer"} || $reader;
	        $reader->{"newline_in"} = $nl = $1;
			$writer->{"newline_out"} = $nl 
			  unless ($writer->{"newline_out"});
		}
		$line =~ s/($CR?$LF)/$LF/gs; # if ($nl);
	}

	return $line;
}


# This function will return the next available line of decoded text, using 
# (if necessary) multiple calls to $reader->RawRead().
#
# An optional parameter may specify the maximum allowable line length, the
# default is 8k.  A max length of 0 is "unlimited".
#
sub Read
{
	my $reader = shift;
	$reader = ($reader->{"reader"} || $reader);

	my $maxlen = shift;
	$maxlen = 8192 unless (defined $maxlen);

	my $eol = $reader->{"EOL"};
    my $buf = \$reader->{"IOBuffer"};
    while (((!$maxlen) || (length($$buf) < $maxlen)) 
        && (!$reader->{"eop"})
        && ($$buf !~ /\Q$eol\E/))
    { 
        my $raw = $reader->RawRead();
        unless (defined $raw)
        {
            $reader->{"eop"} = 1;
            last;
        };

        my $eop = $reader->{"boundary"};
        if (defined $eop)
        {
            my $bp = $reader->{"_boundpre"};
            my ($i, $b, $o) = split(/(^\Q$bp$eop\E(?:--)?\s*$)/m, $raw, 2);
            if ($b)
            {
                $reader->{"postponed"} .= $b.$o;
                $reader->{"eop"} = 1;
                $raw = $i;
            }
        }

	    my $decoder = ($reader->{"decoder"} || 
                       $default_decoderhash->{"DEFAULT"});
        $$buf .= &$decoder($reader, $raw);
    }
    
	my ($l, $e, $d) = split(/(\Q$eol\E)/, $$buf, 2);
    $reader->{"IOBuffer"} = $d;
    $reader->{"Read_Bytes"} += length($l)+length($e);
    return $l.$e if ($e);
    return $l;
}


# This function will put text back on the input buffer, allowing us to do 
# a bit of read-ahead without messing things up too much.
#
sub UnRead
{
	my $reader = shift;
	my $data = shift;
	
	$reader = ($reader->{"reader"} || $reader);
    $reader->{"Read_Bytes"} -= length($data);
	$reader->{"IOBuffer"} = $data . $reader->{"IOBuffer"};
}

# Read a few bytes of data and then unread it immediately.
sub Readahead
{
	my $part = shift;
    my $bytes = shift;

	my $eol = $part->{"EOL"};
	$part->{"EOL"} = "etta er smilega undarlegur newline stafur!"; # FIXME
	my $readahead = $part->Read(512);
	$part->UnRead($readahead);
	$part->{"EOL"} = $eol;

    return $readahead;
}



##############################################################################
# Part writer methods

# This will create a writer part usable for output, possibly "cloned" from
# a part which provides input.  This allows us to recode a part, by cloning
# a writer with different MIME attributes from the original.
#
# Usage: $writer = Anomy::MIMEStream->Writer($reader,
#                    {
#						"_type" => "mimetype/subtype", 
#                       "_encoding" => "encoding",
#                       ...
#                    },
#				   );
#
sub Writer
{
	my $proto = shift;
	my $reader = shift;
	my $attributes = shift;
	my $placement = shift;
	my $writer;
    my $cloning = 0;

	if (defined $reader)
	{
		if (!$reader->{"writer"})
		{
		    # New writer
		    $writer = $proto->New();
			my $l = $writer->{"log"};

			# Clone the part.
			if ($reader->{"writer"})
			{
			   # Part already has a writer.  Deal.
			}
			else
			{
			    %{ $writer } = %{ $reader };
                $cloning = 1;
			}
			
			# We'll be modifying the headers, so...
			$writer->{"headers"} = { };
			%{ $writer->{"headers"} } = %{ $reader->{"headers"} };
			
			undef $writer->{"headers"}->{"content-type"};
			undef $writer->{"headers"}->{"content-disposition"};
			undef $writer->{"headers"}->{"content-transfer-encoding"};
			undef $writer->{"headers"}->{"content-length"};			

			# Chain logs
			$writer->{"log"} = $l;
			$reader->{"log"}->sublog("Writer", SLOG_TRACE, 
			                         { pos => $reader->{"Read_Bytes"} },
									 $writer->{"log"});

	   	    # The part is now schitzo!
			$reader->{"writer"} = $writer;  # WARNING:  Circular reference, GC
			$writer->{"reader"} = $reader;  #           won't work. Use Amputate()
		}
		else
	    {
		    $writer = $reader->{"writer"};
		}
	}
	else
	{
	    $writer = $proto->New();
	}

    $writer->KillRawMimeHeaders();
	$writer->{"rawheader"} =~ s/\s*$//s;
	$writer->{"rawheader"} .= "$LF" if ($writer->{"rawheader"} ne "");
	if (!$writer->{"parent"})
	{
	    $writer->{"rawheader"} .= "MIME-Version: 1.0$LF";
		$writer->{"headers"}->{"mime-version"} = "1.0$LF";
	}

    my $log = $writer->{"log"};

	if (defined $attributes)
	{
		# Set MIME attributes
		$writer->{"mime"} = $attributes;
        $log->entry("mime", SLOG_TRACE, $attributes, 
		            "Set MIME info to: %ATTRIBUTES%");
	}
	if (defined $placement)
	{
	    $writer->{"mime-headers"} = $placement;
	}
	
	# Set up headers
	#
	my $tmp;

	# Content-Type: text/plain; charset="iso-8859-1"
	# Content-Type: multipart/mixed; boundary="abacabacab"
	my $ct;	
	$ct = ($writer->{"mime"}->{"_type"} || "text/plain");
	$tmp = $writer->{"mime"}->{"boundary"};
	if (defined $tmp)
	{
		$ct .= "; boundary=\"". $tmp .'"';
	}
	else
	{
		$ct .= '; charset="'. $tmp .'"' 
			if ($tmp = $writer->{"mime"}->{"charset"});
	}
	$ct .= '; protocol="'. $tmp .'"' if ($tmp = $writer->{"mime"}->{"protocol"});
	$ct .= $LF;

	$writer->{"headers"}->{"content-type"} = $ct;
	$writer->{"rawheader"} .= "Content-Type: ". $ct;

	# Content-Transfer-Encoding: 8bit
	if ($writer->{"mime"}->{"_encoding"})
	{
		my $cte;
		$cte = ($writer->{"mime"}->{"_encoding"} || "8bit");
		$cte .= $LF;

		$writer->{"headers"}->{"content-transfer-encoding"} = $cte;
		$writer->{"rawheader"} .= "Content-Transfer-Encoding: ". $cte;
	}

	# Content-Disposition: attachment; filename="test.txt"
	if (($writer->{"mime"}->{"_disposition"}) ||
	    ($writer->{"mime"}->{"filename"}))
	{
		my $cd;
		$cd = ($writer->{"mime"}->{"_disposition"} || "attachment");
		$cd .= "; filename=\"". $writer->EncodeHeader($tmp) .'"'
			if ($tmp = $writer->{"mime"}->{"filename"});
		$cd .= "$LF";

		$writer->{"headers"}->{"content-disposition"} = $cd;
		$writer->{"rawheader"} .= "Content-Disposition: ". $cd;
	}

	$writer->{"rawheader"} .= "$LF";
	
	# Set up encoder
    if (my $e = $writer->{"encoders"}->{ lc($writer->{"mime"}->{"_encoding"}) })
    {
	    $writer->{"encoder"} = $e;
    };

	return $writer;
}

# Detaches a part from it's clones, to let the garbage collector collect 
# either or both of the parts.
#
sub Amputate
{
	my $part = shift;
	
	if ($part->{"writer"})
	{
		$part->{"writer"}->{"reader"} = undef;
		$part->{"writer"}->{"writer"} = undef;
		$part->{"writer"} = undef;
	}
	
	if ($part->{"reader"})
	{
		$part->{"reader"}->{"reader"} = undef;
		$part->{"reader"}->{"writer"} = undef;
		$part->{"reader"} = undef;
	}
}


sub WriteHeader
{
	my $writer = shift;
	$writer = ($writer->{"writer"} || $writer);
	my $parent = $writer->{"parent"};
	$parent = ($parent->{"writer"} || $parent) if ($parent);
	
	if (defined $parent)
	{
		# Print leading boundary unless this is an inline UU attachment.
		if ($writer->{"uupart"})
		{
			# Fix rawheader, in case some parser messed it up.
			$writer->{"rawheader"} =~ s/$LF+/$LF/gs;
			if ($writer->{"rawheader"} !~ /^begin (\d\d\d\d?) (\S+.*\S+)\s*$/is)
			{
				$writer->{"rawheader"} = "begin " .
										 $writer->{"mime"}->{"permissions"} .
										 " " .
										 $writer->{"mime"}->{"filename"} .
										 "$LF";
			}
		}
		else
		{
			my ($boundary, $boundpre);
		
            if ($parent->{"writer"})
            {
			    $boundary = $parent->{"writer"}->{"mime"}->{"boundary"};
			    $boundpre = $parent->{"writer"}->{"mime"}->{"_boundpre"};
            }
            else
            {
			    $boundary = $parent->{"mime"}->{"boundary"};
                $boundpre = $parent->{"mime"}->{"_boundpre"};
            }

            if (defined $boundary)
			{
			    $parent->Write("$LF") unless ($parent->{"Wrote_NL"});
			    $parent->Write("$boundpre$boundary$LF")
			}
		}
		$parent->Write($writer->{"UNIX-FROM"});
		$parent->Write($writer->{"rawheader"});
	}
	else
	{
	    my $data = $writer->{"UNIX-FROM"}.$writer->{"rawheader"};
	    my $nl = $writer->{"newline_out"};
		$data =~ s/$LF/$nl/gs if ($nl);
		$writer->{"OUT"}->print($data);
	}
}

# This encodes and writes data to our output, using calls to the parent's
# Write() if necessary.
#
sub Write
{
	my $part = shift;
	my $writer = ($part->{"writer"} || $part);

	my $data = shift;
	my $encoder = ($writer->{"encoder"} || $default_encoderhash->{"DEFAULT"});

	$data = &$encoder($writer, $data);
	return unless ($data);

	if (!$writer->{"OUTforce"})
	{
        $writer->{"Wrote_Bytes"} += length($data);
		$writer->{"parent"}->Write($data);
	}
	elsif (my $OUT = $writer->{"OUT"})
	{
	    my $nl = $writer->{"newline_out"};
		$data =~ s/$LF/$nl/gs if ($nl);
        $writer->{"Wrote_Bytes"} += length($data);
	    $OUT->print($data);
	}
	$writer->{"Wrote_NL"} = $part->{"Wrote_NL"} = ($data =~ /$LF$/s);
}
sub print 
{ 
    Write(@_);
}

# The same as Write, only this is meant for text messages which have
# embedded Unix-style newlines.
sub WriteText
{
	my ($part, $data) = @_;
    $data =~ s/$LF/$part->{EOL}/gs;
    return $part->Write($data);
}


# Close a previously opened sequence of parts in the output stream.
# This should only be called at the end of a multipart/* part.
#
sub Close
{
	my $writer = shift;
	$writer = ($writer->{"writer"} || $writer);
	
	my $boundary = $writer->{"mime"}->{"boundary"};
	if (defined $boundary && ($writer->{"postamble"} !~ /\Q$boundary\E/))
	{
	    my $boundpre = $writer->{"mime"}->{"_boundpre"};
	    $writer->Write("$LF") unless $writer->{"Wrote_NL"};
		$writer->Write($boundpre.$boundary."--$LF");
	}
#	$writer->{"postamble"} =~ s/$LF$//s;
	$writer->Write($writer->{"postamble"});
	$writer->{"postamble"} = undef;
	$writer->Write(undef);
}


##############################################################################
# These are renderers - they can be either decoders or encoders.
#

sub RenderHtmlAsText
{
	my $part = shift;
	my $line = shift;
	my $state;
	
	# Get state.
	unless ($state = $part->{"RenderHtmlAsText"})
	{
		$state = $part->{"RenderHtmlAsText"} = {
			"pre" => 0,
			"indent" => "",
			"leftovers" => "",
		};
	}

	my $leftovers = \$state->{"leftovers"};
	my $indent = \$state->{"indent"};
	my $pre = \$state->{"pre"};
	my $out = "";

	# Flush leftovers if we're at the end of the data stream.
	if (!$line)
	{
		$line = $$leftovers;
		$$leftovers = undef;
		return $line;
	}
	
	$line = $$leftovers . $line;
	$$leftovers = undef;

	if ($line =~ /^(.*)<PRE.*?>(.*)$/)
	{
		my ($a,$b) = ($1,$2);
		$out .= StripHTML($a, $pre, $indent, $leftovers);
		$$pre = 1;
		$line = $$leftovers . $b;
	}
	if ($line =~ /^(.*)<\/PRE.*?>(.*)$/)
	{
		my ($a,$b) = ($1,$2);
		$out .= StripHTML($a, $pre, $indent, $leftovers) ."$LF". $$indent;
		$$pre = 0;
		$line = $$leftovers . $b;
	}

	return $out . StripHTML($line, $pre, $indent, $leftovers);
}

# Helper function for RenderHtmlAsText
#
sub StripHTML
{
	my $line = shift;
	my $pre = shift;
	my $indent = shift;
	my $leftovers = shift;

	$line =~ s/\s+/ /sg unless ($$pre);
	
	while ($line =~ s/\s*<[UO]L.*?>\s*/$LF/si)
	{
		$$indent .= "\t";
	}
	while ($line =~ s/\s*<\/[OU]L.*?>\s*/$LF/si)
	{
		$$indent =~ s/\t$//;
	}
	
	$line =~ s/&nbsp;/ /sgi;
	$line =~ s/\s*<HR.*?>\s*/$LF------------------------------------------------------------$LF$$indent/sgi;
	$line =~ s/\s*<(P|H\d).*?>\s*/$LF$LF$$indent/sgi;
	$line =~ s/<A\s+[^>]*HREF=\"([^\"#][^\"]+)\".*?>(.*?)<\/A>/$2 &lt;$1&gt;/sgi;
	$line =~ s/\s*<LI>\s*/$LF$$indent * /sgi;
	$line =~ s/\s*<\/?((B|T)R|HEAD).*?>\s*/$LF$$indent/sgi;
	$line =~ s/\s*<\/TD.*?>\s*/\t/sgi;
	$line =~ s/<.+?>//sg;
#	$line =~ s/([^\s][^$LF]{40,60}.*?)\s+/$1$LF$indent/sg unless ($pre);

	if ($line =~ s/(<.+)$//si)
	{
		$$leftovers = $1;
		if (length($$leftovers) > 2048) 
		{
			$$leftovers .= "><";
		}
		$line .= " ";
	}

	$line =~ s/&lt;/</sgi;
	$line =~ s/&gt;/>/sgi;
	return $line;
}


##############################################################################
# Useful decoders for common data encodings

sub Decode8bit
{
	my $reader = shift;
	my $line = shift;

	return $line;
}

sub DecodeQP
{
	my $reader = shift;
	my $line = shift;

    # This is based on MIME::QuotedPrint, but modified to guarantee that
    # decoded data contains CRLFs wherever Windows would have seen such
    # things...
    my $decoded = $line;
    $decoded =~ s/($CRLF|$CR|$LF)/$CRLF/gs;
    $decoded =~ s/[ \t]+?($CRLF|$)/$1/gs;  # rule #3 (delete trailing space)
    $decoded =~ s/=($CRLF)//gs;            # rule #5 (soft line breaks)
    $decoded =~ s/=([\da-fA-F]{2})/pack("C", hex($1))/gse;
	
	# Initialize the QP recoding cache
	if (!defined $reader->{QPcache})
	{
        $reader->{QPcache} = { };
        $reader->{QPcacheKeys} = [ ];
	}
	
	# Cache the unencoded form of the data, so we can use it verbatim
	# when reencoding.
	$reader->{QPcache}->{$decoded} = $line;
	push @{ $reader->{QPcacheKeys} }, $decoded;

	# Limit the total cache size to somethign reasonable.
	if (65535 < ($reader->{QPcacheSize} += length($line)+length($decoded)))
	{
	    my $del = shift @{ $reader->{QPcacheKeys} };
	    $reader->{QPcacheSize} -= length($del).length($reader->{QPcache}->{$del});
	    delete $reader->{QPcache}->{$del};
	}

#   dbugprint($reader, "DecodeQP: <- ", $line);
#   dbugprint($reader, "DecodeQP: -> ", $decoded);
	return $decoded;
}

sub DecodeBase64
{
	my $reader = shift;
	my $line = shift;

	# This hacks the decoder to handle mangled Base64 text properly, by
	# properly ignoring white space etc.  Note that this will lose the 
	# last 1-3 bytes of data if it isn't properly padded.  We also record
	# the encoded line-length, so we can re-encode stuff using the same 
	# length.
	#
	if (!$reader->{"DecodeBase64llen"}) 
	{
		$line =~ s/[^A-Za-z0-9\/+$LF=]+//gs;
		
		my $nlpos = int((3*(index($line, "$LF") + 1)) / 4);
		$line =~ s/$LF//gs;

		my $llen = int((3*length($line)) / 4);
		my $t = $llen;
		$t = $nlpos if (($nlpos < $llen) && ($nlpos > 0));

		$reader->{"DecodeBase64llen"} = $t;
	}
	else
	{
		$line =~ s/[^A-Za-z0-9\/+=]+//gs;
	}
	$line = $reader->{"DecodeBase64"} . $line;
	$line =~ s/^((?:....)*)(.*?)$/$1/s;
	$reader->{"DecodeBase64"} = $2;

	return decode_base64($line);
}

sub DecodeUU
{
	my $reader = shift;
	my $line = shift;

	if ($line =~ /^end/i)
	{
		$reader->{"postponed"} .= $line;
		$reader->{"eop"} = 1;
		return undef;
	}

	# Sanitiy check - is this really a uuencoded part?
	if ($line =~ /^(.)(.*)\s*$/)
	{
		my $bytes = (ord($1) - ord(' ')) % 64;
		my $len = length($2);

		use integer;
		if ( (($bytes + 2) / 3) != ($len / 4) )
		{
			$reader->{"postponed"} .= $line;
			$reader->{"eop"} = 1;
			return undef;
		}
	}

	return unpack("u", $line);
}

# This will decode a forwarded message so it (probably) looks like the
# original.
#
sub DecodeFwd
{
	my $writer = shift;
	my $line = shift;

	if ($line =~ /^---+\s+End.*?forward/i)
	{
		$writer->{"postponed"} .= $line;
		$writer->{"eop"} = 1;
		return undef;
	}

	$line =~ s/^>(From\s+)/$1/gsm;
	$line =~ s/^- (--)/$1/gsm;

	return $line;
}

# This routine will decode any header-encoded substrings in a string.
# It's generally a good idea not to use this on a header until after 
# you have picked the header apart, since the encoded string might
# contain stuff that would confuse the parser.
#
sub DecodeHeader
{
	my $writer = shift;
	my $string = shift;
	my $comments = shift;
	my $t;
	my $commentref = shift || \$t;

    return "" unless $string;

	# Remove header-continuation characters.  EXPIRAMENTAL!
	$string =~ s/(\?=)\s*$LF\s+(=\?)/$1$2/gs;
	$string =~ s/\s*$LF(\s+)/$1/gs;

	if ($comments)
	{
	    # Remove RFC822 comments and return them seperately
	    $string = " ". $string;
	    $$commentref = "";
	    while ($string =~ s/([^\\])\((.*?[^\\])\)/$1/)
	    {
	        $$commentref .= $1;
	    }
	    $string =~ s/^ //;
	}

	# Decode MIME stuff
	$string =~ s/=\?([^\?]+)\?[Bb]\?([^\?]+)\?=/ $t=$2; $_=decode_base64($t) /ge;
	$string =~ s/=\?([^\?]+)\?[Qq]\?([^\?]+)\?=/ $t=$2; $t=~s|_|=20|g; $_=decode_qp($t) /ge;
 
	return $string;
}


##############################################################################
# Useful encoders for common data encodings
#
# These all either return the encoded data or <undef> if the supplied
# data wasn't sufficient to complete encoding.  If too much data is supplied,
# remainders will be saved for later.  If the data is <undef>, then the
# remainder in the buffer will be padded, encoded and returned.

sub Encode8bit
{
	my $writer = shift;
	my $line = shift;

	return $line;
}

sub Encode7bit
{
	my $writer = shift;
	my $line = shift;

	# This performs some "nice" iso-8859-1 -> US-ASCII munging.
	# Probably not be a good idea (shouldn't this be in the "charset" mapping
	# code I haven't written?) ... but hopefully it'll never even be used, so 
	# that's OK.
	#
	$line =~ tr//aAoOynNcCaeiouyodAEIOUY0D/;
	$line =~ s//th/g;
	$line =~ s//Th/g;
	$line =~ s//ae/g;
	$line =~ s//Ae/g;
	# Give up...
	$line =~ s/[\x80-\xFF]/\?/g;

	return $line;
}

sub EncodeQP
{
	my $writer = shift;
	my $reader = ($writer->{"reader"} || $writer);
	my $line = shift;
    return $line unless ($line);

#   dbugprint($reader,"EncodeQP: <- ", $line);

	# Use the cache if possible.
        my $cache = $reader->{QPcache} || { };
	return $cache->{$line} if ($cache->{$line});

    # This is based on MIME::QuotedPrint, but modified to assume only
    # Windows (CRLF) newline semantics.
    my $dnl = $reader->{"EOL"};
    $dnl  =~ s/([^ \t!-<>-~])/sprintf("=%02X", ord($1))/egs;  # rule #2,#3
    $line =~ s/([^ \t!-<>-~])/sprintf("=%02X", ord($1))/egs;  # rule #2,#3
    $line =~ s/$dnl/$LF/gs; # Temporarily represent newlines with $LF
    $line =~ s/([ \t])($LF|$)/
      join('', map( { sprintf("=%02X", ord($_)) }
                   split('', $1)), $2
      )/egs;                        # rule #3 (encode whitespace at eol)

    # rule #5 (lines must be shorter than 76 chars, but we are not allowed
    # to break =XX escapes).
    $line =~ s/([^$LF]{75})/$1=$CR/gs;            # Insert CRs...
    $line =~ s/(=[0-9A-Fa-f]{0,1})=$CR/=$CR$1/gs; # Fix breakage
    $line .= "=$CR" unless ($line =~ /[$CR$LF]$/s);

    # Convert to parent's newline convention.
    $line =~ s/[$CR$LF]/$reader->{"ENCODED_EOL"}/gs;

#   dbugprint($reader, "EncodeQP: -> ", $line);
	return $line;
}

sub EncodeBase64
{
	my $writer = shift;
	my $line = shift;

	# Flush buffer on undef
	if (!defined $line)
	{
		return undef unless ($writer->{"EncodeBase64"} ne undef);
		$line = $writer->{"EncodeBase64"};
		undef $writer->{"EncodeBase64"};
		return encode_base64($line)."$LF";
	}

	# Get old stuff from buffer.
	$line = $writer->{"EncodeBase64"} . $line;

	# Chop data up, as recommended by MIME::Base64 pod or using the
	# line-size of the original message if possible.
	my $out;
	my $llen = (int($writer->{"DecodeBase64llen"}/3)*3) || 57;
	while ($line =~ s/^(.{$llen,$llen})//s)
	{
		my $chunk = $1;
		$out .= encode_base64($chunk);
	}
	
	# Save remainder.
	$writer->{"EncodeBase64"} = $line;
	
	# Return encoded data or undef.
	return $out;
}

sub EncodeUU
{
	my $writer = shift;
	my $line = shift;
	my $data;
	
	if (!defined $line)
	{
		return "" unless (defined $writer->{"EncodeUU"});

		$data = pack('u', $writer->{"EncodeUU"});
		undef $writer->{"EncodeUU"};
		return $data . "`$LF";
	}
	else
	{
		$line = $writer->{"EncodeUU"} . $line;

		use integer;
		my $chunks = length($line) / 45;
		
		if ($chunks < 1)
		{
			# Need... more... data...
			$writer->{"EncodeUU"} = $line;
			return undef;
		}
		else
		{
			$data = substr($line, 0, $chunks * 45);
			$writer->{"EncodeUU"} = substr($line, $chunks * 45);
			return pack('u', $data);
		}
	}
	# Not reached
}

# This will encode a message for forwarding.
#
sub EncodeFwd
{
	my $writer = shift;
	my $line = shift;

	$line =~ s/^(From\s+)/>$1/gsm;
	$line =~ s/^(--)/- $1/gsm;
	
	return $line;
}

# This routine will encode a string so it is safe for 
# inclusion within a message header, that is, if it contains
# any non-7bit characters or other things which might get
# misunderstood, it will be Base64-encoded.
#
sub EncodeHeader
{
	my $writer = shift;
	my $string = shift;
	
	# FIXME: RFC compliance, check it!
	if ($string =~ /[^A-Za-z0-9,_!\?\.\\\/\@\$\+\=: -]/)
	{
	        my $prefix = '=?'.$writer->{"mime"}->{"charset"}.'?Q?';
		
		$string = encode_qp($string);
		$string =~ s/_/=5F/g;
		$string =~ s/ /_/g;

		$string =~ s/=$CR?$LF//gs;
		chomp $string;
		$string = $prefix.$string.'?=';
	}
	return $string;
}


##############################################################################
# Useful handlers for common MIME types.
#
# These all assume the library is being used by a stream editor, that is
# that our output will be a new MIME message.
#

# This parser deletes a part from the stream.
#
sub ParserDiscard
{
 	my $reader = shift;

    $reader->{"log"}->entry("parser", SLOG_TRACE, undef, "ParserDiscard");
	$reader->{"decoder"} = \&Decode8bit;
	while (my $l = $reader->Read())
	{
		# discard
	}
}

# This parser doesn't modify the part at all.
#
sub ParserCat
{
 	my $part = shift;
    $part->{"log"}->entry("parser", SLOG_TRACE, undef, "ParserCat");

	# Input -> Output !
	$part->WriteHeader();
	while (my $l = $part->Read())
	{
	   	$part->Write($l);
	}

	# Flush
	$part->Write(undef);
}

# This parser is for plain text you expect might contain inline uuencoded
# attachments (e.g. mail from Outlook).  This will also check for forwarded
# messages, and scan them as embedded message/rfc822 parts.
#
sub ParserUUText
{
 	my $part = shift;
    $part->{"log"}->entry("parser", SLOG_TRACE, undef, "ParserUUText");
	
	# Input -> Output !
	$part->WriteHeader();
	while (my $l = $part->Read())
	{
		if ($l =~ /^begin (\d\d\d\d?) \S+/)
		{
			$part->UnRead($l);
			$l = $part->ParserUUAttachment();
		}
		if ($l =~ /^---+.*?Forward.*?---+$/i)
		{
		   	$part->Write($l);
			$l = $part->ParserForwardedMessage();
		}
	   	$part->Write($l);
	}

	# Flush
	$part->Write(undef);
}

# This parser recodes the part to 8-bits, no matter what it contains.
#
sub ParserForce8bit
{
 	my $reader = shift;
	my $writer = $reader->{"writer"};
    $reader->{"log"}->entry("parser", SLOG_TRACE, undef, "ParserForce8bit");

	# Map current encoding to 8-bit encoding, cloning a new writer
	# if necessary.
	if ($writer)
	{
		$writer->{"mime"}->{"_encoding"} = "8bit";
	}
	if ($reader->{"mime"}->{"_encoding"} =~ /8bit/i)
	{
		$writer = $reader;
	}
	else
	{
		my $recode = { };
		%{ $recode } = %{ $reader->{"mime"} };
		$recode->{"_encoding"} = "8bit";
		$writer = Anomy::MIMEStream->Writer($reader, $recode);
	}

	# Input -> Output !
	$writer->WriteHeader();
	while (my $l = $reader->Read())
	{
	   	$writer->Write($l);
	}

	# Need this, since the part contents might not end in a newline,
	# thus messing up following boundaries.
   	$writer->Write("$LF");
	
	# Flush
	$writer->Write(undef);

	# Detach writer from reader (for garbage collection).
	$writer->Amputate();
}

# This part will recursively parse the attached message.
#
sub ParserRFC822
{
 	my $part = shift;
	$part->WriteHeader();
    $part->{"log"}->entry("parser", SLOG_TRACE, undef, "ParserRFC822");

	my $subpart = Anomy::MIMEStream->New(undef, undef, undef, $part);
	
	# Check for newline conventions within the embedded message.
    $subpart->{"newline_in"} = 0;

	# We have to juggle boundary strings a little bit here, since we
	# share boundary strings with the encapsulated message.
	#
	$subpart->{"boundary"} = $part->{"boundary"};
	$subpart->{"_boundper"} = $part->{"_boundpre"};
	$part->{"mime"}->{"boundary"} = undef;

	$subpart->ParseHeader();
	$subpart->ParseBody();
	
	# And more juggling...
	$part->{"postponed"} = $subpart->{"postponed"} . $part->{"postponed"};
}

# This part will recursively parse each of the sub-parts in a 
# multipart-part.
#
sub ParserUnclosedMultipart
{
 	my $part = shift;
	my $writer = $part->{"writer"} || $part;
	my $reader = $part->{"reader"} || $part;
	$part->{"log"}->entry("parser", SLOG_TRACE, undef, "ParserUnclosedMultipart");

    my $sep_re = undef;

    # FIXME:  the boundary guesser should be made more robust by always
	#         guessing and using a regular expression with more than two
	# possiblities (decoded and undecoded).
	#
	$part->{"mime"}->{"boundary"} = $part->GuessBoundary()
	  unless (defined ($part->{"mime"}->{"boundary"}));

    if (defined $part->{"mime"}->{"boundary"})
	{
	    my $b1 = $part->{"mime"}->{"boundary"} || "";
		my $b2 = $part->{"mime"}->{"undecoded-boundary"} || $b1;
        # This should handle the following odd cases from Testvirus.org:
        #  - "MIME Boundary Space Gap Vulnerability"
        #  - "Long MIME Boundary Vulnerability"
        my $b3 = $b1 || $b2;
        $b3 =~ s/^\s+//g; 
        $b3 =~ s/^(.{20,20}).*$/$1/;
        my $pre = "(\Q--\E|)";
        $pre = "(\Q--\E)" if ($b1 eq "" || $b2 eq "");
	    $sep_re = "^$pre(\Q$b1\E|\Q$b2\E|\Q$b3\E[^\\s-]*)(--)?\\s*\$";
#       print STDERR "sep_re: $sep_re\n";
	}

	$part->WriteHeader();

	my $postponed = undef;
	my $line = undef;
	my $postamble = 0;
	$part->{"postamble"} = "";
    while (($line = $postponed) || ($line = $part->Read()))
	{
		$postponed = undef;

		if ((defined $sep_re) && ($line =~ $sep_re)) 
		{
			# Update regular expressions & boundary info
            my ($p, $b) = ($1, $2);
			$reader->{"mime"}->{"_boundpre"} = $p;
			$reader->{"mime"}->{"boundary"} = $b;
			$sep_re = "^(\Q$p\E)(\Q$b\E)(--)?\\s*\$";
#           print STDERR "sep_re: $sep_re\n";
			
			my $nextline = '';
            NEXTLINE: while ($nextline =~ /^\s*$/s)
            {
                $nextline .= $part->Read() || last NEXTLINE;
            }
			if ($nextline =~ /^(([A-Z\d-]+):|\s+\S+=)/im)
			{
                $nextline =~ s/^(\s*\n)*//m;
			    $part->UnRead($nextline);
			    if ($postamble)
				{
				    $part->Write($part->{"postamble"});
				    $part->{"postamble"} = $postamble = undef;
				}
			    my $subpart = Anomy::MIMEStream->New(undef, undef, undef, $part);
				$subpart->ParseHeader();
				$subpart->ParseBody();
				$postponed = $subpart->{"postponed"};
#				$postponed =~ s/$LF$//s;
			}
			else
			{
			    $part->UnRead($nextline);
#			    $line =~ s/$LF?$/$LF/;
		        $part->{"postamble"} .= $line;
			    $postamble = 1;
			}
		}
		elsif ($postamble)
		{
		    $part->{"postamble"} .= $line;
		}
		else
		{
		    $part->Write($line);
		}
	}
}
sub ParserMultipart
{
 	my $part = shift;
    $part->{"log"}->entry("parser", SLOG_TRACE, undef, "ParserMultipart");
	$part->ParserUnclosedMultipart();
	$part->Close();
}

# UU encoded attachment parser.
#
# Essentially we treat a UU-encoded block of text as a seperate part
# with a rather unusual header.
#
sub ParserUUAttachment
{
	my $reader = shift;
	$reader = ($reader->{"reader"} || $reader);
	my $subpart;
    $reader->{"log"}->entry("parser", SLOG_TRACE, undef, "ParserUUAttachment");

	# Read the "begin 664 blah.ext" line, use it as our raw header.
	#
	my $begin = $reader->Read();
	if ($begin =~ /^begin (\d*) (\S+.*\S+)\s*$/i)
	{
		$subpart = Anomy::MIMEStream->New(undef, undef, undef, $reader);
		$subpart->{"mime"}->{"permissions"} = $1;
		$subpart->{"mime"}->{"filename"} = $2;
	}
	else
	{
		return;
	}

	# Set up the header fields used by other parsers.
	#
	$subpart->{"rawheader"}	= $begin;
	$subpart->{"uupart"}	= 1;
	$subpart->{"decoder"}	= \&DecodeUU;
	$subpart->{"encoder"}	= \&EncodeUU;

	$subpart->{"mime"}->{"boundary"} = undef;
	$subpart->{"mime"}->{"_encoding"} = "uue";
	$subpart->{"mime-headers"} = { };
	$subpart->{"mime"}->{"_type"} = "INLINE/". $subpart->GuessMimeType();

	$subpart->ParseBody();
	$subpart->Write(undef);

	return $subpart->{"postponed"};
}

# Forwarded message parser.
#
# Attempt to recognize a forwarded message as an embedded message/rfc822 
# part with a weird encoding.
#
sub ParserForwardedMessage
{
 	my $part = shift;
	my $preamble;
    $part->{"log"}->entry("parser", SLOG_TRACE, undef, "ParserForwardedMessage");

	# Ignore leading empty lines.
	while (($preamble = $part->Read()) && ($preamble =~ /^\s*$/))
	{
		$part->Write($preamble);
	}
	return unless (defined $preamble);
	$part->UnRead($preamble);

	my $subpart = Anomy::MIMEStream->New(undef, undef, undef, $part);
	
	# We have to juggle encoders and decoders here to make the info 
	# in the forwarded message legible.  We wrap the encoders/decoders
	# for the current part, because the subpart may need it's own 
	# routines to handle it's own MIME encoding.
	my $reader = ($part->{"reader"} || $part);
	my $writer = ($part->{"writer"} || $part);
	my ($ode, $oen) = ($reader->{"decoder"}, $writer->{"encoder"});
	$reader->{"decoder"} = $reader->NestRenderers(\&DecodeFwd, $ode);
	$writer->{"encoder"} = $writer->NestRenderers($oen, \&EncodeFwd);

	$subpart->ParseHeader();
	$subpart->ParseBody();
	$subpart->Write(undef);

    # Fix encoders/decoders.
	$reader->{"decoder"} = $ode;
	$writer->{"encoder"} = $oen;

    return $subpart->{"postponed"};
}


##############################################################################
# Miscellania

# This function allows us to do cool things.  The following example will 
# make ParserCat automagically convert a QP-encoded HTML part to a QP-encoded 
# text part.  This will probably come in handy if we decide to to character
# set conversions or some such later on.
#
#   # Some stuff may already have been decoded:
#	$part->{"IOBuffer"} = $part->RendarHtmlAsText($part->{"IOBuffer"});
#   $part->{"decoder"} = $part->NestRenderers(\&RenderHtmlAsText, \&DecodeQP);
#
# Or, if we prefer to work with HTML within the parser:
#
#   $part->{"encoder"} = $part->NestRenderers(\&EncodeQP, \&RenderHtmlAsText);
#
# This is the first time I've used closures for anything useful!  Yay!
#
sub NestRenderers
{
	my $part = shift;
	my $outer = shift;
	my $inner = shift;

	return sub {
		my $p = shift; 
		return &$outer( $p, &$inner( $p, shift ) ); 
	};
}

# This will return a guess as to what the mime type is for the given
# part, determined by the file name.  This is a primitive 'magic'-like
# function.
#
# FIXME:  This function should be more easily configurable/extendable.
#
sub GuessMimeType
{
	my $part = shift;
	my $fn = $part->{"mime"}->{"filename"};

	# Take a peek at our contents...
    my $readahead = $part->Readahead(512);

	## Magic ##

    # Check incrementally larger chunks of our readahead buffer...
	for (my $len = 32; $len < 512; $len += 64)
	{
	     my $buff = substr($readahead, 0, $len);

         # Microsoft/DOS executables
         return "application/x-ms-dos-executable" if ($buff =~ /^MZ/s);

	     # message/rfc822
	     return "message/rfc822" if ($buff =~ /^\s*(From [^\n]+\n)?([A-Za-z0-9_-]+:\s+[^\n]+\n([\t ]+[^\n]+\n)*)(Subject|From|To|Sender|Received):/is);
	     
	     # PGP/GPG messages
	     return "multipart/signed" if ($buff =~ /^-+BEGIN.*?SIGNED\s+MESSAGE-+\s*$/mi);
	     return "application/pgp" if ($buff =~ /^-+BEGIN.*?PGP\s+MESSAGE-+\s*$/mi);

	     # Normal text messages...
	     return "text/plain" if ($buff =~ /^-*\s*(Forward|Original)/im);

	     # multipart/mixed
	     return "multipart/mixed" if ($part->GuessBoundary($buff));
	     
	     # HTML
	     return "text/html" if ($buff =~ /<(HTML|BODY)/i);
	     
	     # Shell scripts, C, C++ code
	     return "text/plain" if ($buff =~ /^#(!|include|define|ifn?def)/im);

	     # Java code
	     return "text/plain" if ($buff =~ /^(import |class |public )/im);
	}

	$fn = $part->{"mime"}->{"name"} unless ($fn);
	return "text/plain" if ($fn =~ /\.(txt|[ch](\+\+|pp))$/i);
	return "text/html" if ($fn =~ /\.html?$/i);

	# Default.
	return "application/octet-stream";
}

# This will return a guess as to what the boundary is for a given message
# part.
#
sub GuessBoundary
{
	my ($part, $readahead) = @_;
	my $fn = $part->{"mime"}->{"filename"};

	unless (defined $readahead)
	{
	    # Take a peek at our contents...
		my $eol = $part->{"EOL"};
		$part->{"EOL"} = "etta er smilega undarlegur newline stafur!"; # FIXME
		$readahead = $part->Read(512);
		$part->UnRead($readahead);
		$part->{"EOL"} = $eol;
	}

    if ($readahead =~ /^--(\S+[^$CR$LF]*)\s*($CR|$LF)(\s+\S+=|Content-|X-|MIME-)[A-Za-z0-9_-]+:/im)
	{
	    my $bound = $1;
		$bound =~ s/--$//;
		return $bound if ($bound ne "");
	}

	return undef;
}

# This will remove all MIME-related headers (except for the MIME-Version)
# from the part's "rawheader" variable, making way for new ones.
#
sub KillRawMimeHeaders
{
	my $part = shift;
	my $killed = "";
	
	my $rh = $LF.$part->{"rawheader"};
	while ($rh =~ s/($LF(MIME-Version:|Content-)[^$LF]*($LF([ \t]+|\S+\=)[^$LF]*)*)//si)
	{
	    $killed .= $1;
	};
	while ($rh =~ s/(${LF}begin \d\d\d\d? \S+[^$LF]*)//si)
	{
	    $killed .= $1;
	};
	$rh =~ s/^$LF\s*//s;
	$part->{"rawheader"} = $rh;

	return $killed;
}

# This routine generates a nice randomish boundary string, which is 
# guaranteed never to repeat itself within the same process.
#
my $MakeBoundarySeq = 0;
sub MakeBoundary
{
	my $boundary = 'MIMEStream=_'. $MakeBoundarySeq++ .'+'. 
			(300476 * rand()) . (rand() * $$);  # hopefully, overkill. :)

	$boundary =~ tr/./_/;
	
	return $boundary;
}

# Returns the "important" part of an email address, or nothing if the
# address is invalid.
#
sub CheckEmail
{
	my ($address, $nameref) = @_;
	my $t;
	$nameref = \$t unless ($nameref);	

	# FIXME:  Not RFC822 compliant!!

	# Remove comments, interpret them as names.
	$address = " ". $address;
	$$nameref = "";
	while ($address =~ s/(\"(.*?)\"|[^\\]\((.*?[^\\])\))//)
	{
		$$nameref .= $2 . $3;
	}

	# Find <email> in address
	if ($address =~ s/([^\\])<(.*?[^\\])>/$1/)	
	{
		$$nameref .= $address;
		$address = $2;
	}
	
	# Remove excess whitespace
	$address  =~ s/^\s*(.*?)\s*$/$1/;
	$$nameref =~ s/^\s*(.*?)\s*$/$1/;

	# By now $address should only be one word...
	return 0 if ($address =~ /\s/);

# Use this to require a FQDN.
#
#	if ($address =~ /^[a-zA-Z0-9_\=\/\!\.\%-]+\@[a-zA-Z0-9\.-]+\.[a-zA-Z0-9-]{2,4}$/)
	if ($address =~ /^[a-zA-Z0-9_\=\/\!\.\%-]+\@[a-zA-Z0-9\.-]+$/)
	{
		# It's okay!
		return $address;
	}

	return 0;
}


sub dbugprint
{
    my $reader = shift;
    my $data = join('', @_);
    $data =~ s/([$CR$LF])/sprintf("=%2.2X", ord($1))/gse;
    my $i = 0;
    while ($reader->{"parent"}) { $reader = $reader->{"parent"}; $i++ }
    print STDERR "$i: $data\n";
}

##############################################################################
									   
END { };
1;
#EOF#
# vi:ts=4 expandtab
