#!/usr/pkg/bin/perl

use strict;
use warnings;
require 5.010;

use utf8;
use Carp;
use Encode;
use Encode::Guess;

use Getopt::Long;
use Pod::Usage;
use Text::ParseWords qw(shellwords);
use Data::Dumper;
$Data::Dumper::Terse = 1;

##
## cdif: word context diff
##
## Copyright (c) 1992-2014 Kazumasa Utashiro
##
## Original: 1992/03/11
##
## Use and redistribution for ANY PURPOSE are granted as long as all
## copyright notices are retained.  Redistribution with modification
## is allowed provided that you make your modified version obviously
## distinguishable from the original one.  THIS SOFTWARE IS PROVIDED
## BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES ARE
## DISCLAIMED.
##

##
## options
##
my $opt_B;
my $opt_d;
my $opt_q;
my $opt_n;
my $opt_mecab;

my $opt_env = 1;
my $opt_stat;
my $opt_color = 1;
my $opt_256 = 1;
my @opt_colormap;
my $opt_commandcolor = 1;
my $opt_markcolor = 1;
my $opt_textcolor = 1;
my $opt_old = 1;
my $opt_new = 1;
my $opt_unknown = 1;
my $opt_command = 1;
my $opt_mark = 1;

my $opt_c;
my $opt_u;
my $opt_i;
my $opt_b;
my $opt_w;
my $opt_t;
my $opt_T;
my $opt_W;

my $rcs;
my @rcsopt;
my $diff;
my @diffopts;
my @sub_diffopts;

binmode STDOUT, ":encoding(utf8)";
binmode STDERR, ":encoding(utf8)";

##
## Special treatment --noenv option.
##
if (grep { $_ eq '--noenv' } @ARGV) {
    $opt_env = 0;
}
if ($opt_env and my $env = $ENV{'CDIFOPTS'}) {
    unshift(@ARGV, shellwords($env));
}

my @optargs = (
    "d=s" => \$opt_d,
    "B|char" => \$opt_B,
    "diff=s" => \$diff,
    "color!" => \$opt_color,
    "256!" => \$opt_256,
    "commandcolor|cc!" => \$opt_commandcolor,
    "markcolor|mc!" => \$opt_markcolor,
    "textcolor|tc!" => \$opt_textcolor,
    "colormap|cm=s" => \@opt_colormap,
    "h|help" => sub { pod2usage() },
    "man" => sub { pod2usage({-verbose => 2}) },
    "env!" => \$opt_env,
    "stat" => \$opt_stat,
    "mecab" => \$opt_mecab,
    "old!" => \$opt_old,
    "new!" => \$opt_new,
    "command!" => \$opt_command,
    "unknown!" => \$opt_unknown,
    "mark!" => \$opt_mark,
    "width|W=i" => \$opt_W,

    "i|ignore-case" => \$opt_i,
    "b|ignore-space-change" => \$opt_b,
    "w|ignore-all-space" => \$opt_w,
    "t|expand-tabs" => \$opt_t,
    "T|initial-tab" => \$opt_T,
    "c|context" => sub { push(@diffopts, "-c") },
    "u|unified" => sub { push(@diffopts, "-u") },
    "C=i" => sub { push(@diffopts, "-C" . ($_[1] > 0 ? $_[1] : '')) },
    "U=i" => sub { push(@diffopts, "-U" . ($_[1] > 0 ? $_[1] : '')) },

    "rcs" => \$rcs,
    "r=s" => sub { push @rcsopt, "-r$_[1]" },
    "q" => sub { push @rcsopt, "-q" },
);

Getopt::Long::Configure("bundling");
GetOptions(@optargs) || pod2usage();

my %opt_d;
if ($opt_d) {
    map { $opt_d{$_}++ } split //, $opt_d;
}

$rcs++ if @rcsopt;
$diff = $rcs ? 'rcsdiff' : 'diff' unless $diff;

push(@diffopts,
     $opt_i ? "-i" : (),
     $opt_b ? "-b" : (),
     $opt_w ? "-w" : (),
     $opt_t ? "-t" : (),
     $opt_T ? "-T" : (),
    );

push(@sub_diffopts,
     $opt_i ? "-i" : (),
     $opt_w ? "-w" : (),
    );

my %colormap = (
    COMMAND => $opt_256 ? "222SE" : "SE" ,
    OMARK   => "CS" ,
    NMARK   => "MS" ,
    OTEXT   => "C"  ,
    NTEXT   => "M"  ,
    OCHANGE => $opt_256 ? "/445": "BDw",
    NCHANGE => $opt_256 ? "/445": "BDw",
    DELETE  => $opt_256 ? "/544": "RDw",
    APPEND  => $opt_256 ? "/544": "RDw",
    );

if (@opt_colormap) {
    map {
	my $c = pop @$_;
	map { $colormap{$_} = $c }
	map { match_glob($_, keys %colormap) }
	@$_;
    }
    map { [ split /=/, $_, -1 ] }
    map { split /,/ }
    @opt_colormap;
}

$opt_commandcolor or $colormap{COMMAND} = "";
$opt_markcolor    or $colormap{OMARK}   = "";
$opt_markcolor    or $colormap{NMARK}   = "";
$opt_textcolor    or $colormap{OTEXT}   = "";
$opt_textcolor    or $colormap{NTEXT}   = "";

warn 'colormap = ', Dumper \%colormap if $opt_d{c};

my $opt_expand = $colormap{COMMAND} =~ /E/;
my $expand_width;
if ($opt_expand) {
    $expand_width = $opt_W || terminal_width();
}

my $DIFF;
my $OLD;
my $NEW;

if ($rcs) {
    my $rcsfile = shift || usage("No RCS filename\n\n");
    $DIFF = "$diff @diffopts @rcsopt $rcsfile|";
} elsif (@ARGV == 2) {
    ($OLD, $NEW) = splice(@ARGV, 0, 2);
    $DIFF = "$diff @diffopts $OLD $NEW |";
} elsif (@ARGV < 2) {
    $DIFF = shift || '-';
} else {
    usage("Arguments error.\n\n") if @ARGV;
}
warn "DIFF = \"$DIFF\"\n" if $opt_d{f};

my %func = $opt_color ?
    (
     DELETE => sub { color("DELETE",  @_) },
     APPEND => sub { color("APPEND",  @_) },
     OLD    => sub { color("OCHANGE", @_) },
     NEW    => sub { color("NCHANGE", @_) },
    ) :
    (
     DELETE => \&bd,
     APPEND => \&bd,
     OLD    => \&ul,
     NEW    => \&ul,
    );

my $wchar_re = qr{
    [\p{East_Asian_Width=Wide}\p{East_Asian_Width=FullWidth}]
}x;

my $w_pattern;
if ($opt_B) {
    $w_pattern = qr/./s;
} else {
    $w_pattern = qr{
	\p{Han} | \p{InHiragana}+ | \p{InKatakana}+ |
	\p{Hang}+     |
	\p{Cyrillic}+ |
	\p{Arabic}+   |
	\p{Thai}+     |
	[_\d\p{Latin}]+ |
	[\ \t\r\f]*\n | \s+ | (.)\g{-1}*
    }x;
}
my @ul = ("", "_\010", "__\010\010");
my @bs = ("", "\010", "\010\010");

##
## Temporary file names
##
my $T1 = new File::Tmpfile;
my $T2 = new File::Tmpfile;
binmode $T1->fh, ":encoding(utf8)";
binmode $T2->fh, ":encoding(utf8)";

##
## Total statistic info
##
my %stat;
@stat{'a', 'd', 'c', 'anl', 'dnl', 'cnl'} = (0, 0, 0, 0, 0, 0);
@stat{'anlb', 'dnlb', 'cnlb'} = (0, 0, 0);

open(DIFF, $DIFF) || die "$DIFF: $!\n";
binmode DIFF, ":encoding(utf8)";

while (<DIFF>) {
    my($so, $se) = colorseq('S');
    my($left, $cmd, $right);
    #
    # normal diff
    #
    if (($left, $cmd, $right) = /^([\d,]+)([adc])([\d,]+)\r?$/) {
	print_command($_);
	my $old = read_diff(*DIFF, scalar(range($left)))  if $cmd =~ /[cd]/;
	my $del = read_diff(*DIFF, 1)                     if $cmd =~ /[c]/;
	my $new = read_diff(*DIFF, scalar(range($right))) if $cmd =~ /[ca]/;

	if ($cmd eq 'c') {
	    my($omark, $nmark) = ("< ", "> ");
	    $old =~ s/^(<[ \t])//mg and $omark = $1;
	    $new =~ s/^(>[ \t])//mg and $nmark = $1;

	    ($old, $new) = context($old, $new);

	    $old =~ s/^/$omark/mg;
	    $new =~ s/^/$nmark/mg;
	}

	if ($opt_color) {
	    $old =~ s{^(<[ \t])(.*)}{
		color("OMARK", $1) . color("OTEXT", $2)
	    }mge if $old;
	    $new =~ s{^(>[ \t])(.*)}{
		color("NMARK", $1) . color("NTEXT", $2)
	    }mge if $new;
	}

	print $old if $old and $opt_old;
	print $del if $del;
	print $new if $new and $opt_new;
    }
    #
    # diff -c
    #
    elsif (($left) = /^\*\*\* ([\d,]+) \*\*\*\*\r?$/) {
	print_command($_);
	my(@old, @new);
	my $oline = range($left);
	@old = read_diffc(*DIFF, $oline);
	my $new;
	if (@old and $old[0] =~ /^--- /) {
	    $new = shift @old;
	    @old = ("");
	} else {
	    $new = <DIFF>;
	}
	my $dline = 0;
	map { ++$dline while /^-/mg } @old;
	if (($right) = $new =~ /^--- ([\d,]+) ----$/) {
	    my $nline = range($right);
	    if (@old == 1 and $old[0] ne "" and $oline - $dline == $nline) {
		@new = ("");
	    } else {
		@new = read_diffc(*DIFF, $nline);
	    }
	    for my $i (0 .. $#old) {
		my $cmark = "! ";
		if ($i % 2) {
		    $old[$i] =~ s/^(![ \t])//mg and $cmark = $1;
		    $new[$i] =~ s/^(![ \t])//mg and $cmark = $1;

		    ($old[$i], $new[$i]) = context($old[$i], $new[$i]);

		    $old[$i] =~ s/^/$cmark/mg;
		    $new[$i] =~ s/^/$cmark/mg;
		}
		if ($opt_color) {
		    $old[$i] =~ s{^([\-\!][ \t])(.*)}{
			color("OMARK", $1) . color("OTEXT", $2)
		    }mge;
		    $new[$i] =~ s{^([\+\!][ \t])(.*)}{
			color("NMARK", $1) . color("NTEXT", $2)
		    }mge;
		}
	    }
	}
	print @old if $opt_old;
	print $new;
	print @new if $opt_new;
    }
    #
    # diff -u
    #
    elsif (/^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@/)  {
	print_command($_);
	my @buf = read_diffu(*DIFF, $1 // 1, $2 // 1);
	my $tab = $opt_T ? "\t" : "";
	while (my($same, $old, $new) = splice(@buf, 0, 3)) {
	    $same =~ s/^.$tab//mgo if not $opt_mark;
	    print $same;
	    if ($old and $new) {
		$old =~ s/^\-$tab//mgo;
		$new =~ s/^\+$tab//mgo;

		($old, $new) = context($old, $new);

		$old =~ s/^/-$tab/mg;
		$new =~ s/^/+$tab/mg;
	    }
	    if ($opt_color) {
		$old =~ s{^(\-)($tab.*)}{
		    ($opt_mark ? color("OMARK", $1) : "") . color("OTEXT", $2)
		}mgoe if $old;
		$new =~ s{^(\+)($tab.*)}{
		    ($opt_mark ? color("NMARK", $1) : "") . color("NTEXT", $2)
		}mgoe if $new;
	    } else {
		if (not $opt_mark) {
		    $old =~ s/^\-$tab//mgo;
		    $new =~ s/^\+$tab//mgo;
		}
	    }
	    print $old if $old and $opt_old;
	    print $new if $new and $opt_new;
	}
    }
    else {
	print if $opt_unknown;
    }
}
close DIFF;

if ($opt_stat) {
    select STDERR;

    print("TOTAL: ");
    printf("append=%d delete=%d change=%d\n",
	   $stat{'a'}, $stat{'d'}, $stat{'c'});
    print("INGORE WHITESPACE: ");
    printf("append=%d delete=%d change=%d\n",
	   $stat{'anl'},
	   $stat{'dnl'},
	   $stat{'cnl'});
    print("INGORE WHITESPACE (bytes): ");
    printf("append=%d delete=%d change=%d\n",
	   $stat{'anlb'},
	   $stat{'dnlb'},
	   $stat{'cnlb'});
}

exit $? >> 8;

######################################################################

sub print_command {
    return unless $opt_command;
    my $line = shift;
    if ($opt_color) {
	if ($opt_expand and length($line) < $expand_width) {
	    $line =~ s/$/' ' x ($expand_width - mbwidth($_))/e;
	}
	$line = color($colormap{COMMAND}, $line);
    }
    print $line;
}

sub context {
    my($old, $new) = @_;
    local $_;

    if ($opt_d{s}) {
	print STDERR "****************************** Comparing ...\n";
	print STDERR $old;
	print STDERR "****************************** and\n";
	print STDERR $new;
	print STDERR "****************************** .\n";
    }

    my %c = (a => 0, d => 0, c => 0);
    my @owlist = maketmp($T1, $old);
    my @nwlist = maketmp($T2, $new);
    my @command = ();

    if ($opt_d{w}) {
	print STDERR "****************************** Comparing ...\n";
	print STDERR join ',', @owlist;
	print STDERR "****************************** and\n";
	print STDERR join ',', @nwlist;
	print STDERR "****************************** .\n";
    }

    my $diff = sprintf "diff @sub_diffopts %s %s", $T1->path, $T2->path;
    open(CDIF, "$diff |") || die "diff: $!\n";
    binmode CDIF, ":encoding(utf8)";
    while (<CDIF>) {
	warn $_ if $opt_d{d};
	if (/^[\d,]+([adc])[\d,]+$/) {
	    push @command, $_;
	    $c{$1}++;
	}
    }
    close CDIF;

    if ($opt_d{d}) {
	printf(STDERR "old=%d new=%d command=%d\n",
	       @owlist+0, @nwlist+0, @command+0);
	printf(STDERR "append=$c{a} delete=$c{d} change=$c{c}\n");
    }

    my($obuf, $nbuf) = makebuf(\@owlist, \@nwlist, \@command);
    die "illegal status of subprocess\n" if ($?>>8) > 1;

    ($obuf, $nbuf);
}

##
## Divide given text into word list.  Then write them into temporary
## file `$file' which include the word list one on each line.  The list
## itself is returned as a result of subroutine.
##
sub maketmp {
    my $tmpfile = shift;
    my $text = shift;
    my @words;
    my @notspace = (0);

    $tmpfile->reset;

  MECAB:

    goto NORMAL unless $opt_mecab;

    my $pid = open(MECAB, '|-');
    goto NORMAL if not defined $pid;

    my $eos = "EOS" . "000";
    $eos++ while $text =~ /$eos/;
    if ($pid) {
	binmode MECAB, ":encoding(utf8)";
	print MECAB $text;
    } else {
	open(STDOUT, ">&", $tmpfile->fh) or die;
	for ("disabling unreach warning") {
	    exec('mecab',
		 '--node-format', '%M\\n', '--eos-format', "$eos\\n");
	}
	while ($text =~ /($w_pattern)/g) {
	    my $w = $1;
	    $w =~ s/^[ \t]*\n/$eos/;
	    $tmpfile->write($w, "\n");
	}
	exit;
    }
    close MECAB;
    $tmpfile->rewind;
    while (my $s = $tmpfile->fh->getline) {
	chomp $s;
	$s =~ s/$eos/\n/;
	push(@words, $s);
    }
    goto DONE;

  NORMAL:

    while ($text =~ /($w_pattern)/g) {
	local $_ = $1;
	if ($opt_w) {
	    push(@notspace, !/\s/);
	    if (shift(@notspace) && $notspace[0]) {
		push(@words, '');
		$tmpfile->write("\n");
	    }
	}
	if (s/^(\s*)\n/\n/ && (length($1) || $opt_b || $opt_w)) {
	    #     ^ This have to be *.  Don't change to +.
	    $tmpfile->write(($opt_b || $opt_w) ? "\n" : "$1\n");
	    push(@words, $1);
	}
	push @words, $_;
	$tmpfile->write($_);
	$tmpfile->write("\n") unless /\n$/;
    }

  DONE:

    $tmpfile->flush->rewind;

    if ($opt_d{a} && @words != wc_l($tmpfile)) {
	die "Error! (\@words != `wc -l $tmpfile`)\n";
    }

    @words;
}

##
##  @owlist: old word list
##  @nwlist: new word list
##  @command: how different these lists (`diff' result command lines)
##
sub makebuf {
    my($ol, $nl, $c) = @_;
    my @owlist = @$ol;
    my @nwlist = @$nl;
    my @command = @$c;

    my($o, $n) = (0, 0);	# pointers
    my(@obuf, @nbuf);

    for (@command) {
	my($old, $cmd, $new) = /([\d,]+)([adc])([\d,]+)/ or do {
	    die "Panic! Unexpected diff output";
	};
	my($o1, $o2) = range($old);
	my($n1, $n2) = range($new);
	map { $_-- } $o1, $o2, $n1, $n2; # make them zero origined

	push(@obuf, @owlist[$o .. $o1 - 1]), $o = $o1 if $o < $o1;
	push(@nbuf, @nwlist[$n .. $n1 - 1]), $n = $n1 if $n < $n1;

	$stat{$cmd}++;

	if ($cmd eq 'd') {
	    my $os = join('', @owlist[$o1 .. $o2]);
	    if ($owlist[$o2] =~ /\S/) {
		$stat{'dnl'}++;
		$stat{'dnlb'} += length($os);
	    }
	    push(@obuf, $func{DELETE}->($os));
	    $o = $o2 + 1;
	}
	elsif ($cmd eq 'c') {
	    my $os = join('', @owlist[$o1 .. $o2]);
	    my $ns = join('', @nwlist[$n1 .. $n2]);
	    if (($owlist[$o2] =~ /\S/) || ($nwlist[$n2] =~ /\S/)) {
		$stat{'cnl'}++;
		$stat{'cnlb'} += length($os);
		$stat{'cnlb'} += length($ns);
	    }
	    push(@obuf, $func{OLD}->($os));
	    push(@nbuf, $func{NEW}->($ns));
	    $o = $o2 + 1;
	    $n = $n2 + 1;
	}
	elsif ($cmd eq 'a') {
	    my $ns = join('', @nwlist[$n1 .. $n2]);
	    if ($nwlist[$n2] =~ /\S/) {
		$stat{'anl'}++;
		$stat{'anlb'} += length($ns);
	    }
	    push(@nbuf, $func{APPEND}->($ns));
	    $n = $n2 + 1;
	}
    }
    push(@obuf, @owlist[$o .. $#owlist]);
    push(@nbuf, @nwlist[$n .. $#nwlist]);

    (join('', @obuf), join('', @nbuf));
}

sub read_diff {
    my($FH, $c) = @_;
    my @buf = ();
    while ($c-- > 0) {
	push @buf, scalar <$FH>;
    }
    wantarray ? @buf : join '', @buf;
}

sub read_diffc {
    my($FH, $n) = @_;
    my @buf;
    local $_;
    my $i = 0;
    while ($n-- && ($_ = <$FH>)) {
	$i++ if ($i % 2) != /^!/;
	$buf[$i] .= $_;
	last if /^--- /;
    }
    map { $_ // "" } @buf;
}

my %slot;
BEGIN {
    %slot = (" " => 0, "\t" => 0, "-" => 1, "+" => 2);
}
sub read_diffu {
    my $FH = shift;
    my @l = (0, @_);

    my $i = 0;
    my @buf;
    my $slot;
    while (2 * $l[0] + $l[1] + $l[2] > 0 and $_ = <$FH>) {
	# `git diff' produces message like this:
	# "\ No newline at end of file"
	/^[ \t\-\+]/ or next;
	$i++ while ($i % 3) != ($slot = $slot{substr($_, 0, 1)});
	$l[$slot]--;
	$buf[$i] .= $_;
    }
    map { $_ // "" } @buf;
}

sub mbwidth {
    my $arg = shift;
    my $len = 0;
    while ($arg =~ m{
	( (?: (?! $wchar_re) \P{Mn} ) + )
	|
	( (?: (?= $wchar_re) \P{Mn} ) + )
    }xgo) {
	$len += defined $1 ? length($1) : length($2) * 2;
    }
    $len;
}

sub ul {
    local $_ = join '', @_;
    s/(.)/$ul[mbwidth($1)].$1/ge;
    $_;
}
sub bd {
    local $_ = join '', @_;
    s/(\S)/$1.$bs[mbwidth($1)].$1/ge;
    $_;
}

sub range {
    local $_ = shift;
    my($from, $to) = /,/ ? split(/,/) : ($_, $_);
    wantarray ? ($from, $to) : $to == 0 ? 0 : $to - $from + 1;
}

use Term::ANSIColor qw(:constants);

use constant { FG => 'fg', BG => 'bg' } ;

sub colorseq {
    my $colormode = shift;

    my($start, $end) = ('', '');
    if ($colormode =~ /:/) {
	($start, $end) = split(/:/, $colormode, 2);
    } else {
	map {
	    if (s/\/((?:[\da-f]{6})|[0-5][0-5][0-5])//i and $opt_256) {
		$start .= ansi256($1, BG);
	    }
	    if (s/((?:[\da-f]{6})|[0-5][0-5][0-5])//i and $opt_256) {
		$start .= ansi256($1, FG);
	    }
	    $start .= UNDERLINE if /U/;
	    $start .= REVERSE   if /S/;
	    $start .= BOLD      if /D/;
	    $start .= BLINK     if /F/;
	    $start .= RED       if /R/; $start .= ON_RED       if /r/;
	    $start .= GREEN     if /G/; $start .= ON_GREEN     if /g/;
	    $start .= BLUE      if /B/; $start .= ON_BLUE      if /b/;
	    $start .= CYAN      if /C/; $start .= ON_CYAN      if /c/;
	    $start .= MAGENTA   if /M/; $start .= ON_MAGENTA   if /m/;
	    $start .= YELLOW    if /Y/; $start .= ON_YELLOW    if /y/;
	    $start .= BLACK     if /K/; $start .= ON_BLACK     if /k/;
	    $start .= WHITE     if /W/; $start .= ON_WHITE     if /w/;
	} $colormode if $colormode;
	$end = RESET if $start;
	$start =~ s/m\e\[/;/g;
    }
    ($start, $end);
}

sub ansi256 {
    my($code, $fg_or_bg) = @_;
    my($r, $g, $b);
    if ($code =~ /^([0-5])([0-5])([0-5])$/) {
	($r, $g, $b) = ($1, $2, $3);
    }
    elsif ($code =~ /^([0-9a-f]{2})([0-9a-f]{2})([0-9a-f]{2})$/i) {
	$r = int(5 * hex($1) / 255);
	$g = int(5 * hex($2) / 255);
	$b = int(5 * hex($3) / 255);
    }
    else {
	croak "Color format error.\n";
    }
    sprintf("\e[%d;%d;%dm",
	    $fg_or_bg eq BG ? 48 : 38,
	    5,
	    16 + ($r * 36) + ($g * 6) + $b
	);
}

my %colorcache;
my $reset_re;
BEGIN {
    my $reset = RESET;
    $reset_re = qr/\Q$reset/;
}

sub color {
    my($color, $text) = @_;
    return $text unless $color;
    unless ($colorcache{$color}) {
	my $mode = exists $colormap{$color} ? $colormap{$color} : $color;
	$colorcache{$color} = [ colorseq($mode) ];
    }
    my($s, $e) = @{$colorcache{$color}};
    if ($s ne "") {
	$text =~ s/(^|$reset_re)([^\e\r\n]+)/$1$s$2$e/mg;
    }
    $text;
}

##
## Implement minimum function because Text::Glob is not in standard library
##
sub match_glob {
    local $_ = shift;
    s/\?/./g;
    s/\*/.*/g;
    my $regex = qr/^$_$/;
    grep { $_ =~ $regex } @_;
}

sub wc_l {
    my $file = shift;
    my $line;
    $file->rewind;
    $line++ while $file->fh->getline;
    $file->rewind;
    $line;
}

sub eval {
    print STDERR &unctrl($_[0]), "\n" x ($_[0] !~ /\n$/) if $_[1] || $opt_d{e};
    eval shift;
    die sprintf("eval failed in file %s on line %s\n$@", (caller)[1,2]) if $@;
}

sub unctrl {
    local $_ = shift;
    s/([\000-\010\013-\037])/'^' . pack('c', ord($1)|0100)/ge;
    $_;
}

sub terminal_width {
    my $default = 80;
    my $cols = `tput cols`;
    chomp $cols;
    $cols > 0 ? int($cols) : $default;
}

######################################################################

=head1 NAME

cdif - word context diff

=head1 SYNOPSIS

cdif [cdif option] file1 file2

cdif [rcs options] [cdif options] file

cdif [cdif options] [diff-data]

Options:

	-c, -Cn		context diff
	-u, -Un		unified diff
	-i		ignore case
	-b		ignore trailing blank
	-w		ignore whitespace
	-t		expand tabs
	-T		initial tabs
	--rcs		use rcsdiff
	-r<rev>, -q	rcs options

	-B                  char-by-char comparison
	-W                  specify terminal width
	--diff=command      specify diff command
	--stat              show statistical information
	--colormap=s        specify color map
	--[no]color         color or not            (default true)
	--[no]256           ANSI 256 color mode     (default true)
	--[no]commandcolor  color for command line  (default true)
	--[no]markcolor     color for diff mark     (default true)
	--[no]textcolor     color for normal text   (default true)
	--[no]old	    print old text          (default true)
	--[no]new	    print new text          (default true)
	--[no]command	    print diff command line (default true)
	--[no]unknown	    print unknown line      (default true)


=head1 DESCRIPTION

B<cdif> is a post-processor of the Unix diff command.  It highlights
deleted, changed and added words based on word context.

You may want to compare character-by-character rather than
word-by-word.  Option B<-B> option can be used for that purpose.

If only one file is specified, cdif reads that file (stdin if no file)
as a output from diff command.

Lines those don't look like diff output are simply ignored and
printed.

=head1 OPTIONS

=over 7

=item B<->[B<cCuUibwtT>]

Almost same as B<diff> command.

=item B<--rcs>, B<-r>I<rev>, B<-q>

Use rcsdiff instead of normal diff.  Option B<--rcs> is not required
when B<-r>I<rev> is supplied.

=item B<-B>, B<--char>

Compare the data character-by-character context.

=item B<-W> I<width>, B<--width>=I<width>

Explicitly specify terminal width.

=item B<--diff>=I<command>

Specify the diff command to use.

=item B<-->[B<no>]B<color>

Use ANSI color escape sequence for output.

=item B<--colormap>=I<colormap>, B<--cm>=I<colormap>

Basic I<colormap> format is :

    FIELD=COLOR

where the FIELD is one from these :

    COMMAND  Command line
    OMARK    Old mark
    NMARK    New mark
    OTEXT    Old text
    NTEXT    New text
    OCHANGE  Old change part
    NCHANGE  New change part
    APPEND   Appended part
    DELETE   Deleted part

You can make multiple filelds same color joining them by = :

    FIELD1=FIELD2=...=COLOR

Also wildcard can be used for field name :

    *CHANGE=BDw

Multiple fields can be specified by repeating options

    --cm FILED1=COLOR1 --cm FIELD2=COLOR2 ...

or combined with comma (,) :

    --cm FILED1=COLOR1,FIELD2=COLOR2, ...

COLOR is combination of single character representing uppercase
foreground color :

    R  Red
    G  Green
    B  Blue
    C  Cyan
    M  Magenta
    Y  Yellow
    K  Black
    W  White

and corresponding lowercase background color :

    r, g, b, c, m, y, k, w

or RGB value if using ANSI 256 color terminal :

    FORMAT:
        foreground[/background]

    COLOR:
        000 .. 555       : 6 x 6 x 6 216 colors
        000000 .. FFFFFF : 24bit RGB mapped to 216 colors

    Sample:
        005     0000FF        : blue foreground
           /505       /FF00FF : magenta background
        000/555 000000/FFFFFF : black on white
        500/050 FF0000/00FF00 : red on green

and other effects :

    S  Standout (reverse video)
    U  Underline
    D  Double-struck (boldface)
    F  Flash (blink)
    E  Expand (only for command line)

When B<E> is specified for command line, the line is expanded to
window width filling up by space characters.

Defaults are :

    COMMAND => "SE"
    OMARK   => "CS"
    NMARK   => "MS"
    OTEXT   => "C"
    NTEXT   => "M"
    OCHANGE => "BD/445"
    NCHANGE => "BD/445"
    DELETE  => "RD/544"
    APPEND  => "RD/544"

This is equivalent to :

    cdif --cm 'COMMAND=SE,OMARK=CS,NMARK=MS' \
         --cm 'OTEXT=C,NTEXT=M,*CHANGE=BD/445,DELETE=APPEND=RD/544'

=item B<-->[B<no>]B<commandcolor>, B<--cc>

=item B<-->[B<no>]B<markcolor>, B<--mc>

=item B<-->[B<no>]B<textcolor>, B<--tc>

Enable/Disable using color for the corresponding field.

=item B<-->[B<no>]B<old>, B<-->[B<no>]B<new>

Print or not old/new text in diff output.

=item B<-->[B<no>]B<command>

Print or not command lines preceding diff output.

=item B<-->[B<no>]B<unknown>

Print or not lines not look like diff output.

=item B<-->[B<no>]B<mark>

Print or not marks at the top of diff output lines.  At this point,
this option is effective only for unified diff.

Next example produces the output exactly same as I<new> except visual
effects.

    cdif -U100 --nomark --noold --nocommand --nounknown old new

These options are prepared for watchdiff(1) command.

=item B<--stat>

Print statistical information at the end of output.  It shows number
of total appended/deleted/changed words in the context of cdif.  It's
common to have many insertions and deletions of newlines becuase of
text filling process.  So normal informaiton is followed by modified
number which ignores insert/delete newlines.

=item B<--mecab>

Experimental option for using B<mecab> as a tokenizer.  To use this
option, external command B<mecab> has to be installed.

=back

=head1 AUTHOR

Kazumasa Utashiro

https://github.com/kaz-utashiro/cdif

=head1 SEE ALSO

perl(1), diff(1), sdif(1), watchdiff(1)

=head1 BUGS

B<cdif> is naturally not very fast because it uses normal diff command
as a backend processor to compare words.

=head1 COPYRIGHT

Use and redistribution for ANY PURPOSE are granted as long as all
copyright notices are retained.  Redistribution with modification is
allowed provided that you make your modified version obviously
distinguishable from the original one.  THIS SOFTWARE IS PROVIDED BY
THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES ARE
DISCLAIMED.

=cut

package File::Tmpfile;

use strict;
use warnings;
use Carp;
use Fcntl;
use IO::File;
use IO::Handle;

sub new {
    my $class = shift;
    my $fh = new_tmpfile IO::File or die "new_tmpefile: $!\n";
    $fh->fcntl(F_SETFD, 0) or die "fcntl F_SETFD: $!\n";
    bless { FH => $fh }, $class;
}

sub write {
    my $obj = shift;
    my $fh = $obj->fh;
    if (@_) {
	my $data = join '', @_;
	$fh->print($data);
    }
    $obj;
}

sub flush {
    my $obj = shift;
    $obj->fh->flush;
    $obj;
}

sub rewind {
    my $obj = shift;
    $obj->fh->seek(0, 0) or die;
    $obj;
}

sub reset {
    my $obj = shift;
    $obj->rewind;
    $obj->fh->truncate(0);
    $obj;
}

sub fh {
    my $obj = shift;
    $obj->{FH};
}

sub fd {
    my $obj = shift;
    $obj->fh->fileno;
}

sub path {
    my $obj = shift;
    sprintf "/dev/fd/%d", $obj->fd;
}

1;
