#!/usr/bin/perl -w

eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
    if 0; # not running under some shell

# $Id: tv_grab_dk_tvtid,v 1.2 2008/08/07 14:23:33 pingel Exp $

=pod

=head1 NAME

tv_grab_dk_tvtid - Grab TV listings for Denmark.

=head1 SYNOPSIS

tv_grab_dk_tvtid --help

tv_grab_dk_tvtid [--config-file FILE] --configure [--gui OPTION]

tv_grab_dk_tvtid [--config-file FILE] [--output FILE] [--days N]
[--offset N] [--quiet]

tv_grab_dk_tvtid --capabilities

tv_grab_dk_tvtid --version

=head1 DESCRIPTION

Output TV listings for several channels available in Denmark.  The
data comes from tvtid.tv2.dk. The grabber relies on parsing HTML so it
might stop working at any time.

First run B<tv_grab_dk_tvtid --configure> to choose, which channels you want
to download. Then running B<tv_grab_dk_tvtid> with no arguments will output
listings in XML format to standard output.

B<--configure> Prompt for which channels,
and write the configuration file.

B<--config-file FILE> Set the name of the configuration file, the
default is B<~/.xmltv/tv_grab_dk_tvtid.conf>.  This is the file written by
B<--configure> and read when grabbing.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.

B<--output FILE> Write to FILE rather than standard output.

B<--days N> Grab N days.  The default is one week.

B<--offset N> Start N days in the future.  The default is to start
from today.

B<--quiet> Suppress the progress messages normally written to standard
error.

B<--capabilities> Show which capabilities the grabber supports. For more
information, see L<http://membled.com/twiki/bin/view/Main/XmltvCapabilities>

B<--version> Show the version of the grabber.

B<--help> Print a help message and exit.

=head1 SEE ALSO

L<xmltv(5)>.

=head1 AUTHOR

Sren Pingel Dalsgaard (soren@dalsgaards.dk). Based on the dr grabber by
Sren Pingel Dalsgaard (soren@dalsgaards.dk). Based on the tv2 grabber by
Jesper Skov (jskov@zoftcorp.dk). Originally based on tv_grab_nl by
Guido Diepen and Ed Avis (ed@membled.com) and tv_grab_fi by Matti
Airas.
Additions by Jesper Toft (jesper@bzimage.dk)

=head1 BUGS

Things in the programme descriptions to handle:

* Better categories from descriptions.

* Customization of subtitles "Episode #" - perhaps even make it optional

=cut

######################################################################
# initializations

use strict;
use XMLTV::Version '$Id: tv_grab_dk_tvtid,v 1.2 2008/08/07 14:23:33 pingel Exp $ ';
use XMLTV::Capabilities qw/baseline manualconfig cache/;
use XMLTV::Description 'Denmark';
use Getopt::Long;
use HTML::TreeBuilder;
use HTML::Entities; # parse entities
use IO::File;
use URI;
use JSON;

use Date::Manip;

use XMLTV;
use XMLTV::Memoize;
use XMLTV::ProgressBar;
use XMLTV::Ask;
use XMLTV::Mode;
use XMLTV::Config_file;
use XMLTV::DST;
use XMLTV::Date;
# Todo: perhaps we should internationalize messages and docs?
use XMLTV::Usage <<END
$0: get Danish television listings in XMLTV format
To configure: $0 --configure [--config-file FILE]
To grab listings: $0 [--config-file FILE] [--output FILE] [--days N]
[--offset N] [--quiet]
To show capabilities: $0 --capabilities
To show version: $0 --version
END
;

# Use Log::TraceMessages if installed.
BEGIN {
    eval { require Log::TraceMessages };
    if ($@) {
        *t = sub {};
        *d = sub { '' };
    }
    else {
        *t = \&Log::TraceMessages::t;
        *d = \&Log::TraceMessages::d;
        Log::TraceMessages::check_argv();
    }
}

use LWP::UserAgent;
my $ua = LWP::UserAgent->new;
$ua->agent("xmltv/$XMLTV::VERSION");

# Initialize cookie_jar
use HTTP::Cookies;
my $cookies = HTTP::Cookies->new;
$ua->cookie_jar($cookies);

# Whether zero-length programmes should be included in the output.
my $WRITE_ZERO_LENGTH = 0;

# default language
my $LANG = 'da';

# Winter time in Denmark - summer time is one hour ahead of this.
my $TZ = '+0100';

sub process_summary_page( $$$ );
sub process_listings_page( $$$$$ );

######################################################################
# get options

# Known categories.
my %tvtid_categories = ( 11854683 => 'Brn og Unge',
			 11848684 => 'Dokumentar',
			 11825897 => 'Film',
			 11830626 => 'Livsstil',
			 11847662 => 'Musik',
			 11838192 => 'Natur og Milj',
			 11840363 => 'Nyheder',
			 11870463 => 'Regional',
			 11831900 => 'Serier',
			 11792069 => 'Sport',
			 11837090 => 'Sundhed og Mad',
			 11844770 => 'Underholdning',
                         # The following are educated guesses
			 11839678 => 'Kultur',
			 11860240 => 'Shopping',
			 11860606 => 'Lotto',
			 11870299 => 'Religion',
			 11839487 => 'Videnskab',
			 11837327 => 'Alment',
			 11840285 => 'Undervisning',
    );

my %categories = ( 11854683 => 'kids',
		   11848684 => 'documentary',
		   11825897 => 'movie',
		   11830626 => 'lifestyle',
		   11847662 => 'music',
		   11838192 => 'nature',
		   11840363 => 'news',
		   11870463 => 'local',
		   11831900 => 'series',
		   11792069 => 'sport',
		   11837090 => 'health',
		   11844770 => 'entertainment',
		   # The following are educated guesses
		   11839678 => 'culture',
		   11860240 => 'shopping',
		   11860606 => 'lotto',
		   11870299 => 'religion',
		   11839487 => 'science',
		   11837327 => 'misc',
		   11840285 => 'education'
    );

my %movietypes = (      'action'        => 'action',
                        'drama'         => 'drama',
                        'erotisk'       => 'erotic',
                        'eventyr'       => 'adventure',
                        'gyser'         => 'horror',
                        'komedie'       => 'comedy',
                        'krimi'         => 'crime',
                        'thriller'      => 'thriller',
                        'romantisk'     => 'romance',
                        'western'       => 'western'
			);
# Get options
XMLTV::Memoize::check_argv('get_url');
my ($opt_days, $opt_offset, $opt_help, $opt_output,
    $opt_configure, $opt_config_file, $opt_gui,
    $opt_quiet, $opt_list_channels);
$opt_offset = 0; # default
GetOptions('days=i'        => \$opt_days,
           'offset=i'      => \$opt_offset,
           'help'          => \$opt_help,
           'configure'     => \$opt_configure,
           'config-file=s' => \$opt_config_file,
           'gui:s'         => \$opt_gui,
           'output=s'      => \$opt_output,
           'quiet'         => \$opt_quiet,
           'list-channels' => \$opt_list_channels,
	   )
    or usage(0);

usage(1) if $opt_help;

die 'number of days must not be negative'
    if ((defined $opt_days && $opt_days < 0) || ($opt_offset < 0));

my $maxdays=7;

die "tvtid.tv2.dk only provide information for today and the next $maxdays days."
    if ($opt_offset > $maxdays);

if (! defined $opt_days) {
    # If there is no --days given. Set it to as many as possible.
    $opt_days = $maxdays - $opt_offset;
} else {
    # --days option was given. Warn if its too high.
    if (($opt_days + $opt_offset) > $maxdays) {
	$opt_days = $maxdays - $opt_offset;
	warn "tvtid.tv2.dk only provide information for today and the next $maxdays days (and not for all channels).";
    }
}

XMLTV::Ask::init($opt_gui);

my $mode = XMLTV::Mode::mode('grab', # default
                             $opt_configure => 'configure',
                             $opt_list_channels => 'list-channels',
			     );

# File that stores which channels to download.
my $config_file
    = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_dk_tvtid', $opt_quiet);

if ($mode eq 'configure') {
    XMLTV::Config_file::check_no_overwrite($config_file);
      open(CONF, ">$config_file") or die "cannot write to $config_file: $!";
      # find list of available channels
      my $bar = new XMLTV::ProgressBar('getting list of channels', 1)
	  if not $opt_quiet;
      my %channels = get_channels();
      die 'no channels could be found' if (scalar(keys(%channels)) == 0);
      update $bar if not $opt_quiet;
      $bar->finish() if not $opt_quiet;
      
      # Ask about each channel.
      my @chs = sort keys %channels;
      my @names = map { $channels{$_} } @chs;
      my @qs = map { "add channel $_?" } @names;
      my @want = ask_many_boolean(1, @qs);
      foreach (@chs) {
	  my $w = shift @want;
	  warn("cannot read input, stopping channel questions"), last
	      if not defined $w;
	  # No need to print to user - XMLTV::Ask is verbose enough.

	  # Print a config line, but comment it out if channel not wanted.
	  print CONF '#' if not $w;
	  my $name = shift @names;
	  print CONF "channel $_ $name\n";
	  # TODO don't store display-name in config file.
      }
      
      close CONF or warn "cannot close $config_file: $!";
      say("Finished configuration.");

      exit();
  }

# Not configuring, we will need to write some output.
die if $mode ne 'grab' and $mode ne 'list-channels';

# If we are grabbing, check we can read the config file before doing
# anything else.
#
my @config_lines;
if ($mode eq 'grab') {
    @config_lines = XMLTV::Config_file::read_lines($config_file);
}

my %w_args;
if (defined $opt_output) {
    my $fh = new IO::File(">$opt_output");
    die "cannot write to $opt_output: $!" if not defined $fh;
    $w_args{OUTPUT} = $fh;
}
$w_args{encoding} = 'ISO-8859-1';
my $writer = new XMLTV::Writer(%w_args);
# TODO: standardize these things between grabbers.
$writer->start
    ({ 'source-info-url'     => 'http://tvtid.tv2.dk/',
       'source-data-url'     => 'http://tvtid.tv2.dk/',
       'generator-info-name' => 'XMLTV',
       'generator-info-url'  => 'http://membled.com/work/apps/xmltv/',
   });

if ($opt_list_channels) {
    my $bar = new XMLTV::ProgressBar('getting list of channels', 1)
	if not $opt_quiet;
    my %channels = get_channels();
    die 'no channels could be found' if (scalar(keys(%channels)) == 0);
    update $bar if not $opt_quiet;

    foreach my $ch_did (sort(keys %channels)) {
	my $ch_name = $channels{$ch_did};
	$writer->write_channel({ id => $ch_did,
                                 'display-name' => [ [ $ch_name ] ],
                                 'icon' => [{'src' => get_icon($ch_did)}]
				 });
    }
    $bar->finish() if not $opt_quiet;
    $writer->end();
    exit();
}

# Not configuring or writing channels, must be grabbing listings.
die if $mode ne 'grab';
my (%channels, @channels, $ch_did, $ch_name);
my (%convert, $orig_ch, $new_ch);
my $line_num = 1;
foreach (@config_lines) {
    ++ $line_num;
    next if not defined;

    # FIXME channel data should be read from the site, and then the
    # config file only gives the XMLTV ids that are interesting.
    #
    if (/^channel:?\s+(\S+)\s+([^\#]+)/) {
	($ch_did, $ch_name) = ($1, $2);
	$ch_name =~ s/\s*$//;
	push @channels, $ch_did;
	$channels{$ch_did} = $ch_name;
    } elsif (/^convert:?\s+(\S+)\s+([^\#]+)/) {
	($orig_ch, $new_ch) = ($1, $2);
	$new_ch =~ s/\s*$//;
	$convert{$orig_ch} = $new_ch;
    } else {
	warn "$config_file:$.: bad line\n";
    }
}


######################################################################
# subroutine definitions

# get channel listing
sub get_channels {
    my %channels;

    my $sec_per_day = 24*60*60;
    my $eight_hours = 8*60*60; # Off by two for some reason
    my $now8 = int(time()/$sec_per_day)*$sec_per_day+$eight_hours;

    use JSON;
    my $url = 'http://tvtid.tv2.dk/allekanaler/get.php/day-'.$now8.'.html';
    my $json_t = get_url($url);
    #print $url."\n\n".$json_t."\n\n";
    my $json = new JSON(autoconv => 0, pretty => 1, indent => 2, utf8 => 1);
    my $js_ref = $json->jsonToObj($json_t);
    my @js = @$js_ref;

    foreach my $elm (@js)
    {
	my $id = $elm->{logo};
	$id =~ s,/img/logos/logo-,,;
	$id =~ s,\.gif,,;
	$channels{$id} = $elm->{name};
    }
    return %channels;
}

# Clean up bad characters in the site's HTML.
my $warned_bad_chars;
sub tidy( $ ) {
    for (my $tmp = shift) {
	tr/\222/''/;
	tr/\011/ /; # tabs are allowed - turn them into spaces
	if (tr/\012\015\040-\176\240-\377//dc) {
	    warn 'removing bad characters' unless ($warned_bad_chars++ || $opt_quiet);
	}
	return $_;
    }
}

my $fetched;
sub get_url( $ ) {
    sleep rand(5) if defined $fetched;
    $fetched = 1;
    my $c = tidy($ua->get(shift)->content);
    return $c;
}

# Bump a YYYYMMDD date by one.
sub correct_day {
    my $d = shift;
    my $h = shift;
    
    $d = UnixDate(DateCalc($d, "+ 1 day"), '%Q') if UnixDate($h, '%H') < 6;

    my ($base, $tz) = @{date_to_local(parse_local_date("$d " . $h, $TZ), $TZ)};

    return UnixDate($base, '%q') . " $tz";
}

# Icon URL for a given channel.
sub get_icon {
    my ($url) = @_;
    return "http://tvtid.tv2.dk/img/logos/logo-" . $url . ".gif";
}

# Split list of people into array
sub get_people {
    my $p = shift;
    my $people;
    $p =~ s/ m\.fl.+//;
    @$people = split(/, | og |\//, $p);
    s/.+:// foreach @$people;
    s/^\s+// foreach @$people;
    s/\s+$// foreach @$people;
    s/[.]$// foreach @$people;
    return $people;
}

######################################################################
# begin main program

my $sec_per_day = 24*60*60;
my $eight_hours = 8*60*60; # Off by two for some reason
my $now8 = int(time()/$sec_per_day)*$sec_per_day+$eight_hours;

Date_Init('TZ=utc');

foreach $ch_did (@channels) {
    $ch_name = $channels{$ch_did};
    $writer->write_channel({ id => $ch_did,
                             'display-name' => [ [ $ch_name ] ],
			     'icon' => [{'src' => get_icon($ch_did)}]
			     }) unless $convert{$ch_did};
}

for (my $i = $opt_offset;$i<($opt_offset + $opt_days);$i++) {

    my $sec_per_day = 24*60*60;
    my $eight_hours = 8*60*60; # Off by two for some reason
    my $day8 = int(time()/$sec_per_day+$i)*$sec_per_day+$eight_hours;

    
    my $day = UnixDate(DateCalc(parse_date('now'), "+ $i days"), '%Q');

    use JSON;
    my $url = 'http://tvtid.tv2.dk/allekanaler/get.php/day-'.$day8.'.html';
    my $json_t = get_url($url);

    my $json = new JSON(autoconv => 0, pretty => 1, indent => 2, utf8 => 1);
    my $js_ref = $json->jsonToObj($json_t);
    my @js = @$js_ref;

    foreach my $elm (@js)
    {
	my $id = $elm->{logo};
	$id =~ s,/img/logos/logo-,,;
	$id =~ s,\.gif,,;
	if (defined $channels{$id}) {

	    my $programs_ref = $elm->{programs};
	    my @programs = @$programs_ref;
	    for my $program (@programs) {
		# If 'overlap=1' the program is present on the
		# previous day as well, so skip it
		next if defined $program->{overlap};

		#print $json->objToJson($program)."\n";

		my %prog = ();
		if ($convert{$id}) {
		    $prog{channel} = $convert{$id};
		} else {
		    $prog{channel} = $id;
		}
		$prog{start} = correct_day($day, $program->{start});
		$prog{stop} = correct_day($day, $program->{end});
		$prog{category} = [ [ $categories{$program->{cat}} ] ];

		my $program_url="http://tvtid.tv2.dk/program/index.php/id-".$program->{'id'}.".html";
		my $contents = get_url($program_url);

		my $aspect;
		$aspect = '4:3' if ($contents =~ /pictureFormat43 enabled/);
		$aspect = '16:9' if ($contents =~ /pictureFormat169 enabled/);
		my $rerun;
		$rerun = {} if ($contents =~ /rerun enabled/);
		my $sound = 'stereo';
		$sound = 'surround' if ($contents =~ /surround enabled/);
		my $teletext;
		$teletext = {} if ($contents =~ /teletext enabled/);
		my $subtitles;
		$subtitles = {} if ($contents =~ /subtitles enabled/);
		my $colour = 1;
		$colour = 0 if ($contents =~ /blackwhite enabled/);
		#if ($contents =~ /subtitlesHearingImpaired enabled/) { print "TTH "; }

		my $descr;
		my $with;
		my $actors;
		my $writers;
		my $adapters;
		my $presenters;
		# Get program information. Keep "<" at the end intentionally.
		if ($contents =~ /<div class="longinfo">(.+?<)\/div>/) {
		    $descr = $1;

		    $descr =~ s/\<p\>Sendt frste gang .*?\</</; # Remove
		    $descr =~ s/\<p\>Sendes ogs .*?\</</; # Remove

		    $descr =~ s/\<p\>/ /g;	  # Remove <p>
		    $descr =~ s/\<\/p\>/ /g;	  # Remove </p>

		    if ($descr =~ /(.*)\<h2 class="programListHeader"\>Medvirkende:\<\/h2\>(.*?)(\<.*)/) {
			($descr, $actors) = ($1 . $3, get_people($2));
		    }

		    if ($descr =~ /(.*)\<h2 class="programListHeader"\>Instruktr:\<\/h2\>(.*)(\<.*)/) {
			($descr, $writers) = ($1 . $3, get_people($2));
		    }

		    if ($descr =~ /(.*)\<BR\>Tilrettelggelse: (.+?)(\<.*)/) {
			($descr, $adapters) = ($1 . $3, get_people($2));
		    }

		    if ($descr =~ /(.*)Vrt: (.+?)\.(.*)/) {
			($descr, $presenters) = ($1 . $3, get_people($2));
		    }

		    #if ($descr =~ /<BR>(.+)/) {
			#print "***** $1 *****\n";
		    #}

		    # Clean up $descr:
		    $descr =~ s/\<BR\>/ /g;	  # Remove <BR>
		    $descr =~ s/<$//g;	   # Remove < at end
		    $descr =~ s/ \s+/ /g; # Remove double spaces
		    $descr =~ s/ +$//g;	 # rtrim
		    $descr =~ s/^ +//g;	# ltrim

		}

		my $original;
		if ($contents =~ /\<h2 class="originalTitle"\>Originaltitel: (.+?)\<\/h2\>/) {
		    $original = $1;
		}

		my $episode;
		if ($contents =~ /\<div class="episode"\>Episode: \((.+?)\)\<\/div\>/) {
		    $episode = $1;
		    $episode =~ s/:/\//;
		}

		my @titles = ([ $program->{title}, $LANG ]);
		push @titles, [ $original ] if defined $original;
		$prog{title} = \@titles;
		$prog{desc} = ([ [ $descr, $LANG ] ]) if defined $descr && $descr;
		my %v = ( present => 1,
			  colour => $colour );
		$v{aspect} = $aspect if defined $aspect;
		$prog{video} = \%v;
		$prog{audio} = { present => 1,
				 stereo => $sound };
		my $subtitle;
		$subtitle = $program->{description} if $program->{description};
		if (defined $episode) {
		    if (defined $subtitle) {
			$subtitle .= "." unless $subtitle =~ /\.$/;
			$subtitle .= " Episode " . $episode . ".";
		    } else {
			$subtitle = "Episode " . $episode . ".";
		    }
		}
		$prog{'sub-title'} = [ [ $subtitle, $LANG ] ] if defined $subtitle;
		if (defined $subtitle && $subtitle =~ / fra (\d{4})/) {
		    $prog{date} = $1;
		}
		$prog{subtitles} = [ { type => 'teletext' } ] if defined $teletext;
		$prog{subtitles} = [ { type => 'onscreen' } ] if defined $subtitles;
		$prog{'episode-num'} = [ [ $episode, 'onscreen' ] ] if defined $episode;
		$prog{'previously-shown'} = $rerun if defined $rerun;
		my %c;
		$c{actor} = $actors if defined $actors;
		$c{writer} = $writers if defined $writers;
		$c{adapter} = $adapters if defined $adapters;
		$c{presenter} = $presenters if defined $presenters;
		$prog{credits} = \%c if %c;

		#print $json->objToJson(\%prog) . "\n";

		$writer->write_programme(\%prog);
	    }
	}
    }
}
$writer->end();
exit(0);
