#!/usr/bin/perl -w
=pod 

=head1 NAME

tv_grab_re - Grab TV listings for Nouvelle Caledonie Island (France).

=head1 SYNOPSIS

To configure: tv_grab_nc --configure [--config-file FILE]
To grab channels listing: tv_grab_nc --list-channels [--output FILE]
To grab programmes listings: tv_grab_nc [--output FILE] [--offset N] [--days N] [--quiet]
Slower, detailed grab: tv_grab_nc --slow [--output FILE] [--offset N] [--days N] [--quiet]
Help: tv_grab_nc --help

=head1 DESCRIPTION

Output TV listings for Canal Satellite Caledonie channels available in 
Nouvelle Caledonie Island. The data comes from www.canalsatellite-caledonie.com.
The default is to grab listing only for the current day. By default program descriptions are not downloaded, so if you want description and credits, you should 
activate the --slow option.

B<--configure> Grab channels informations and ask for channel type and names.

B<--output FILE> Write to FILE rather than standard output.

B<--days N> Grab N days, rather than only for the current day.

B<--offset N> Start grabbing for N days in the future, eg offset 1
means start with tomorrow.

B<--slow> Get additional information from the website, like program
description and credits.

B<--quiet> Suppress the progress messages normally written to standard
error.

B<--version> Show the version of the grabber.

B<--help> Print a help message and exit.

=head1 SEE ALSO

L<xmltv(5)>

=head1 AUTHOR

Eric Castelnau, eric.castelnau@free.fr
Inspired by hacks from Marcus Westbury <marcus.westbury@gmail.com>

=cut

use XMLTV::Usage <<END
$0: get Nouvelle Caledonie Island television listings in XMLTV format
To configure: tv_grab_nc --configure [--config-file FILE]
To grab channels listing: tv_grab_nc --list-channels [--output FILE]
To grab programmes listings: tv_grab_nc [--output FILE] [--days N] [-offset N] [--quiet]
Slower, detailed grab: tv_grab_nc --slow [--output FILE] [--days N] [--offset N] [--quiet]
END
  ;

use warnings;
use strict;
use XMLTV::Version '$Id: tv_grab_nc,v 1.2 2007/04/15 13:56:50 ecastelnau Exp $ ';
use XMLTV::Capabilities qw/baseline manualconfig cache/;
use XMLTV::Description 'Nouvelle Caledonie Island';
use Getopt::Long;
use HTML::TreeBuilder;
use HTML::Entities; # parse entities
use IO::File;
use URI;
use Date::Manip;
use XMLTV;
use XMLTV::Memoize;
use XMLTV::Ask;
use XMLTV::ProgressBar;
use XMLTV::Mode;
use XMLTV::Config_file;
use XMLTV::DST;
use XMLTV::Get_nice;
use XMLTV::Memoize; XMLTV::Memoize::check_argv 'get_nice';

###
### Main declarations
###
my %BROADCASTERS = (
	'CANALSAT' => "Canal Satellite Nouvelle Caledonie",
);
my $CANALSAT_BASE_URL = "http://srv3.media-overseas.com/";
my $CANALSAT_ICON_URL = "http://www.canalsatellite-caledonie.com/lebouquet/leschaines/pageschaines/images_chaines";

###
### Options processing
###
my ($opt_offset, $opt_days);
my $opt_help;
my $opt_output;
my $opt_quiet;
my $opt_config_file;
my $opt_configure;
my $opt_list_channels;
my $opt_slow;

GetOptions(	'days=i'	=> \$opt_days,
		'offset=i'	=> \$opt_offset,
		'help'          => \$opt_help,
		'output=s'      => \$opt_output,
		'quiet'         => \$opt_quiet,
		'configure'     => \$opt_configure,
		'config-file=s' => \$opt_config_file,
		'list-channels' => \$opt_list_channels,
		'slow'		=> \$opt_slow,
) or usage(0);

# need help
usage(1) if $opt_help;

# verbose by default
$opt_quiet = 0;

# number of day to process
die 'Number of days must not be negative' if (defined $opt_days && $opt_days < 0);
die 'Number of days must not be more than 5' if (defined $opt_days && $opt_days > 5);
$opt_days = 1 if not defined $opt_days;

# offset - zero (default) means start from today
die 'Offset must not be negative' if (defined $opt_offset && $opt_offset < 0);
$opt_offset = 0 if not defined $opt_offset;

# output file
$opt_output = '-' if not defined $opt_output;

# slow mode off by default
$opt_slow = 0 if not defined $opt_slow;

# Now detects if we are in configure mode
my $mode = XMLTV::Mode::mode('grab', $opt_configure => 'configure',
                        	$opt_list_channels => 'list-channels');

# File that stores which channels to download.
my $config_file = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_nc',
						$opt_quiet);

# Content of $config_file
my @config_lines;

###
### Global variables
###

# channels list
my @channels;

###
### Sub sections
###
sub dprint($) {
	my $msg = shift;
	print STDERR "debug: " . $msg;
}

sub dump_channel($) {
	my $c = shift;
	print "type: $c->{'type'}\n";
	print "id  : $c->{'id'}\n";
	print "name: $c->{'name'}\n";
	print "icon: $c->{'icon'}\n";
}

sub dump_programme($) {
	my $c = shift;
	print "channel  : $c->{'channel'}\n";
	print "title    : $c->{'title'}[0][0]\n";
	print "start    : $c->{'start'}\n";
	print "stop     : $c->{'stop'}\n";
	#print "length   : $c->{'length'}sec.\n";
	print "category : $c->{'category'}[0][0]\n" if defined $c->{'category'};
}

sub new_xmltv_writer() {
	my %writer_args;
	my $file = new IO::File(">$opt_output");
	die "Cannot write to $opt_output: $!" if not defined $file;
	$writer_args{OUTPUT} = $file;
	$writer_args{'encoding'}  = 'ISO-8859-1';
	return new XMLTV::Writer(%writer_args);
}

sub get_channels_list($) {
	my $arg = shift;
	my @channels;

	if ($arg eq 'CANALSAT') {
		my $url = "http://srv3.media-overseas.com/FMPro?-db=caledonie.fp5&-lay=M1&-format=csat_caledonie/recherchecaledo.htm&-view";
		my $html = get_nice_tree $url;
	
		my $chaines = $html->look_down('_tag', 'select', 'name', 'idchaine');
		foreach my $chaine ($chaines->look_down('_tag', 'option')) {
			my %channel;

			my $id = $chaine->attr_get_i('value');
			next if ($id eq "0...999");
			my $title = $chaine->as_text();
	
			$channel{'type'} = "CANALSAT";
			$channel{'id'} = $id;
			$channel{'name'} = $title;
			$channel{'icon'} = "$CANALSAT_ICON_URL/${id}_grand.gif";

			push @channels,\%channel;
		}
	
		$html->delete();
		undef $html;
	}
	
	return @channels;
}

sub get_canalsat_programmes_list_slow($%) {
	my $url = shift(@_);
	my $p = shift(@_);

	# get request and parse
	my $html = get_nice_tree $url;

	# look for the résumé
	my $table = $html->look_down('_tag', 'table', 'width', '480', 'vspace', '0', 'cellspacing', '0', 'cellpadding', '0', 'border', '0', 'align', 'center');
	$table->objectify_text();
    #$table->dump();dprint("\n\n");

    my @text = $table->look_down('_tag', '~text');

	foreach (@text) {
		my $t = $_->attr_get_i('text');

		next if ($t =~ /^ /);

		next if (length $t < 7);

		$p->{'desc'} = [ [ $t, "fr" ] ];
	}

	# look for director/actors
	$table = $html->look_down('_tag', 'table', 'width', '621', 'height', '318', 'cellspacing', '4', 'cellpadding', '0', 'border', '0', 'bgcolor', '#CCCCCC', 'align', 'center');
    $table->objectify_text();
    my $td = $table->look_down('_tag', 'td', 'width', '475', 'height', '107');
    my $i = $td->look_down('_tag', 'i');
    #$i->dump();dprint("\n\n");
    @text = $i->look_down('_tag', '~text');
	
    my (@directors, @actors);
	foreach (@text) {
		my $t = $_->attr_get_i('text');

		if ($t =~ /^.*\(.*\) r.alis. en \d{4} de (.*) Avec (.*)/) {
			push @directors, $1;

			my @a = split(',', $2);
			foreach (@a) {
				if ($_ =~ /(.*) \(.*\)/) {
					push @actors, $1;
				}
			}
		}
	}

	$p->{credits}{director} = \@directors if @directors;
	$p->{credits}{actor}    = \@actors if @actors;

	$html->delete();
	undef $html;
}

sub get_canalsat_programmes_list($$$) {
	my ($idchaine, $offset, $days) = @_;
	die if $offset < 0;
	die if $days < 1;

	# the progs list to return
	my @progs = ();

	my $today = ParseDate 'today';

	for ($offset + 1 .. $offset + $days) {
		my $n = $_ - 1;

		# the start tag of programs for this day
		my $start = DateCalc($today, "+ $n days");
		my $url_day = UnixDate($start, "%d%%2F%m%%2F%Y");

		# build the url
		my $url = "http://srv3.media-overseas.com/FMPro?-db=caledonie.fp5&-format=csat%5fcaledonie%2frechercheresultatscaledo.htm&-error=rechercheerreurreunion.htm&-SortField=presseheuretri&-SortORder=Ascending&-max=99&";
		$url .= "-op=eq&jourdate=".$url_day."&";
		$url .= "-op=cn&periodejour=a...z&";
		$url .= "-op=cn&idchaine=".$idchaine."&";
	
		# simulate a click on the submit button
		my $random = int(rand(42)) + 1;
		$url .= "-Find.x=".$random."&";
		$random = int(rand(41)) + 1;
		$url .= "-Find.y=".$random;

		# get request and parse
		my $html = get_nice_tree $url;

		# look for the table of programmes
		my $table = $html->look_down('_tag', 'table', 'width', '815', 'cellspacing', '2', 'cellpadding', '0', 'border', '0', 'align', 'center');

		return @progs if not defined $table;

		$table->objectify_text();

		# look for the list of rows of the table
		my @rows = $table->look_down('_tag', 'table', 'width', '797', 'height', '53', 'cellspacing', '1', 'cellpadding', '0', 'border', '0', 'align', 'center');

		# scan each row
		foreach my $r (@rows) {
			# the current prog being processed
			my %prog;
			my ($tt, $stop);

			$prog{'channel'} = $idchaine.".canalsatellite-caledonie.com";

			# look for every column
			my @td = $r->look_down('_tag', 'td');

			# scan each cellule of the row
			foreach my $cell (@td) {
				my @b = $cell->look_down('_tag', '~text');
				foreach my $tag (@b) {
					$tt = $tag->attr_get_i('text');
					#$tag->dump();
	
					# here is the start hour
					if ( $tt =~ /(\d\d):(\d\d)/ ) {
						$start = Date_SetTime($start, $1, $2, 0);
						my $start_str = UnixDate($start, "%Y%m%d%H%M%S");
						$prog{'start'} = $start_str." +0400";
					}
	
					# here is the title with the duration in minutes
					if ( $tt =~ /(.*)\s\((\d+)\'\).*/ ) {
						# sometimes title doesn't exist for the first programme
						next if ($1 eq "");

						# "Fin des programmes" is not a real tv show
						next if ($1 eq "Fin des programmes");

						my $title = $1;

						$prog{'title'} = [ [ $title ] ];
	
						$stop = DateCalc($start, "+$2 min");
						my $stop_str = UnixDate($stop, "%Y%m%d%H%M%S");
						$prog{'stop'} = $stop_str." +0400";

						# Change the start date because last programme begins
						# this day (at 23:00 PM) and ends the day after 
						# (at 01:00 AM)
						my $y = UnixDate($stop, "%Y");
						my $m = UnixDate($stop, "%m");
						my $d = UnixDate($stop, "%d");

						$start = Date_SetDateField($stop, "y", $y);
						$start = Date_SetDateField($start, "m", $m);
						$start = Date_SetDateField($start, "d", $d);

						# length tag not necessary if start and stop 
						# are presents
						#$prog{'length'} = $2 * 60;
	
						# sometime there is also the category
						my $i = $cell->look_down('_tag', 'span', 'class', 'rouge');
						my $ii = $i->look_down('_tag', '~text');
						my $category = $ii->attr_get_i('text');
						utf8::encode($category) if (utf8::is_utf8($category));
						$category =~ s/^\s+//;
						$category =~ s/\s+$//;
						$prog{'category'} = [ [ $category, "fr" ] ] if not $category eq "";
					}
	
					# Year of the prog (if present)
					if ( $tt =~ / - (\d\d\d\d)/ ) {
						$prog{'date'} = $1;
					}
				}

				# get director/actors if --slow was asked
				if ($opt_slow) {
					@b = $cell->look_down('_tag', 'a', 'class', 'rouge bold');
					foreach (@b) {
						my $href = "http://srv3.media-overseas.com/".$_->attr_get_i('href');
						get_canalsat_programmes_list_slow($href, \%prog);
					}
				}
			}
	
			# add the current prog to the list if it is valid
			if (defined $prog{'title'}) {
				push @progs,\%prog;
			}
		}

		$html->delete();
		undef $html;
	}

	return @progs;
}

###
### Configure mode
###
if ($mode eq 'configure') {
	XMLTV::Config_file::check_no_overwrite($config_file);
	
	# ask user to select his broadcasters
	my @id = sort keys %BROADCASTERS;
	my @questions = map { "Would you like to download data for '$BROADCASTERS{$_}' ?" } @id;
	my @responses = ask_many_boolean(1, @questions);

	# retrieve the channels list for each broadcasters
	foreach (0..$#id) {
		if ($responses[$_]) {
			my @ch = get_channels_list($id[$_]);
			@channels = (@channels, @ch) if @ch;
		}
	}

	# ask user to add or not each channel
	@questions = map { "Add channel $_->{'name'} ?" } @channels;
	@responses = ask_many_boolean(1, @questions);

	# create configuration file
	open(CONF, ">$config_file") or die "Cannot write to $config_file: $!";

	foreach (@channels) {
		my $r = shift @responses;
	
		if ($r) {
			print CONF "channel:";
		}
		else {
			print CONF "#channel:";
		}

#		if ( $_->{'type'} eq "CANALSAT" )
#		{
		print CONF "$_->{'id'}.canalsatellite-caledonie.com;$_->{'name'}\n";
#		}
#		else
#		{
#			print CONF "$_->{'id'}.parabolereunion.com;$_->{'name'}\n";
#		}
	}

	close CONF or warn "Cannot close $config_file: $!";
	say("Finished configuration.");
	exit();
}

###
### List channels
###
if ($mode eq 'list-channels') {
	# init the XMLTV writer
	my $writer = new_xmltv_writer();

	# ask user to select his broadcasters
	my @id = sort keys %BROADCASTERS;
	my @questions = map { "Select '$BROADCASTERS{$_}' ?" } @id;
	my @responses = ask_many_boolean(1, @questions);

	# retrieve the channels list for each broadcasters
	foreach (0..$#id) {
		if ($responses[$_]) {
			my @ch = get_channels_list($id[$_]);
			@channels = (@channels, @ch) if @ch;
		}
	}

	# write the XML header
	$writer->start({
		'generator-info-name' => 'XMLTV',
		'generator-info-url'  => 'http://membled.com/work/apps/xmltv/',
	});

	foreach (@channels) {
		my $id = "id";
		$id = $_->{'id'}.".canalsatellite-caledonie.com";
#		$id = $_->{'id'}.".parabolereunion.com" if ($_->{'type'} eq "PARABOLE");

		$writer->write_channel({
			'id'           => $id,
			'display-name' => [[ $_->{'name'} ]],
#			'icon'         => [{ 'src' => $_->{'icon'} }]
		});
	}

	$writer->end();
	exit();
}

###
### Grab programmes listing
###
die if $mode ne 'grab';

# Now let's do it
Date_Init("TZ=UTC");

# Change HTTP Headers to make canalsatellite-caledonie.com happy
$XMLTV::Get_nice::ua->default_headers->push_header('Keep-Alive'=>'300');
$XMLTV::Get_nice::ua->default_headers->push_header('Connection'=>'keep-alive');
$XMLTV::Get_nice::ua->default_headers->push_header('Referer'=>'http://srv3.media-overseas.com/FMPro?-db=caledonie.fp5&-lay=M1&-format=csat_caledonie/recherchecaledo.htm&-view');

# read tv_grab_nc conf file...
@config_lines = XMLTV::Config_file::read_lines($config_file);

# ...and parse its content
my $n = 0;
foreach (@config_lines) {
	++$n;
	next if not defined;

	if ( /^channel:(\d+)\.(.*);(.*)/ ) {
		my %channel;

		$channel{'id'} = $1;
		$channel{'name'} = $3;

		if ($2 eq 'canalsatellite-caledonie.com') {
			$channel{'type'} = "CANALSAT";
			$channel{'icon'} = "$CANALSAT_ICON_URL/".$channel{'id'}."_grand.gif";
		}
		
#		if ($2 eq 'parabolereunion.com') {
#			$channel{'type'} = "PARABOLE";
#			$channel{'icon'} = "$PARABOLE_ICON_URL/channel_logo_small".$channel{'id'}.".gif";
#		}

		push @channels,\%channel;
	}
	else {
		die "$config_file:$n - Bad line channel";
	}
}

die "No working channels configured, so no grabing" if not @channels;

# init the XMLTV writer
my $writer = new_xmltv_writer();

# write the XML header
$writer->start({
	'generator-info-name' => 'XMLTV',
	'generator-info-url'  => 'http://membled.com/work/apps/xmltv/',
});

# first, write channels
foreach (@channels) {
	my $id = "id";
	$id = $_->{'id'}.".canalsatellite-caledonie.com";
#	$id = $_->{'id'}.".parabolereunion.com" if ($_->{'type'} eq "PARABOLE");

	$writer->write_channel({ 
		'id'           => $id,
		'display-name' => [ [ $_->{'name'} ] ],
#		'icon'         => [ { 'src' => $_->{'icon'} } ]
	});
}

# then, programmes
foreach (@channels) {
	my @progs;

	if ($_->{'type'} eq 'CANALSAT') {
 	   @progs = get_canalsat_programmes_list($_->{'id'}, $opt_offset, $opt_days);
	}

#	if ($_->{'type'} eq 'PARABOLE') {
#		@progs = get_parabole_programmes_list($_->{'id'}, $opt_offset, $opt_days);
#	}

	foreach my $prog (@progs) {
		$writer->write_programme(\%$prog);
	}
}

$writer->end();

