#!/usr/pkg/bin/perl

use lib qw(); # PERL5LIB
use lib qw(/usr/pkg/lib); # LIBDIR

use Getopt::Long qw(:config posix_default no_ignore_case);
use Pod::Usage;

use ZnapZend;
my $VERSION = '0.dev'; #VERSION

my %featureMap = map { $_ => 1 } qw(pfexec sudo oracleMode resume recvu compressed sendRaw lowmemRecurse zfsGetType skipIntermediates sendIntermediates forbidDestRollback);

sub main {
    my $opts = {};
    GetOptions($opts, qw(help|h man debug|d noaction|n nodestroy features=s),
        qw(sudo pfexec rootExec=s daemonize pidfile=s logto=s loglevel=s),
        qw(runonce:s connectTimeout=s timeWarp=i nodelay autoCreation version),
        qw(forcedSnapshotSuffix=s forbidDestRollback),
        qw(skipIntermediates|i sendIntermediates|I),
            # Note: the intended usage is either via feature as done
            # originally, or via shorthand -i/-I flags as mnemonical
            # for "zfs send", so the longer variants --skipIntermediates
            # vs. --sendIntermediates are not documented in the manpage.
        qw(skipOnPreSnapCmdFail skipOnPreSendCmdFail recursive|r inherited since=s sinceForced=s),
        qw(cleanOffline mailErrorSummaryTo=s)
        ) or exit 1;

    $opts->{version} && do {
        print "znapzend version $VERSION\n";
        exit;
    };

    #split all features into individual options
    if ($opts->{features}){
        for my $feature (split /,/, $opts->{features}){
            $featureMap{$feature} or die "ERROR: feature '$feature' not supported\n";
            if ($feature eq "pfexec" or $feature eq "sudo") {
                warn "--features=" . $feature . " is deprecated. Use --rootExec=" . $feature . " instead\n";
                $opts->{rootExec} = $feature;
            } else {
                $opts->{$feature} = 1;
            }
        }
        delete $opts->{features};
    }

    if (defined($opts->{sendIntermediates})) {
        if ( (defined($opts->{skipIntermediates})) && ($opts->{skipIntermediates} != 0) ) {
            warn "Options skipIntermediates and sendIntermediates are exclusive; and sendIntermediates wins!";
        }
        # Note that the legacy default is to sendIntermediates.
        # Beware or benefit that this can deliver manually named
        # snapshots to your destination that would have to be
        # cleaned manually when no longer wanted there, but that
        # can be used as common snapshots to repair broken syncs.
        $opts->{skipIntermediates} = 0;
        delete $opts->{sendIntermediates};
    }

    if (defined($opts->{runonce})) {
        # Note: empty arg is ok, means to pass all backup plans once
        chomp $opts->{runonce};
        if ($opts->{runonce} ne '') {
            $opts->{dataset} = $opts->{runonce};
        }
        $opts->{runonce} = 1;
    }

    if (defined($opts->{recursive})) {
        $opts->{recursive} = 1;
    } else {
        if (defined($opts->{runonce}) && 1 == $opts->{runonce}) {
            $opts->{recursive} = 0;
        } else {
            # Effectively, recurse our whole ZFS namespace from its null root
            $opts->{recursive} = 1;
        }
    }

    if (defined($opts->{forbidDestRollback})) {
        $opts->{forbidDestRollback} = 1;
    } else {
        $opts->{forbidDestRollback} = 0;
    }

    if (defined($opts->{sinceForced}) && ($opts->{sinceForced} eq '')) {
        delete $opts->{sinceForced};
    }

    if (defined($opts->{since}) && ($opts->{since} eq '')) {
        delete $opts->{since};
    }

    if (defined($opts->{sinceForced})) {
        chomp $opts->{sinceForced};
        $opts->{since} = $opts->{sinceForced};
        $opts->{forbidDestRollback} = 0;
        delete $opts->{sinceForced};
        warn "The --sinceForced option is used and will try to ensure that the snapshot exists in history of destinations (can delete and rewrite subsequent snapshots)"
    } else {
        if (defined($opts->{since})) {
            $opts->{forbidDestRollback} = 1;
            warn "The --since option is used and will try to ensure that the snapshot exists in history of destinations only if an older snapshot is the latest common (would not delete and rewrite subsequent snapshots to make way)"
        }
    }

    if (defined($opts->{since})) {
        # Chop off leading @ if any
        chomp $opts->{since};
        $opts->{since} =~ s/^[@]*// ;
    }

    if (defined($opts->{forcedSnapshotSuffix}) && ($opts->{forcedSnapshotSuffix} ne '')) {
        chomp $opts->{forcedSnapshotSuffix};
        if ($opts->{forcedSnapshotSuffix} eq '') {
            die "ERROR: Nothing left of --forcedSnapshotSuffix=... after chomp!\n";
        }
        if ( !( (defined($opts->{runonce})) && (1 == $opts->{runonce}) ) ) {
            die "ERROR: --forcedSnapshotSuffix=... can only be used with --runonce=...\n";
        }
        $opts->{forcedSnapshotSuffix} =~ s/^[@]*// ;
        warn "This round of 'run-once' mode would use forced snapshot suffix '" .
            $opts->{forcedSnapshotSuffix} .
            "' instead of one generated by pattern configured in the backup plan(s)\n";
        # Note: There are further sanity checks right in createSnapshot()
        # to avoid using fatally bad strings as snapshot names
    }

    if (!defined($opts->{lowmemRecurse})) {
        $opts->{lowmemRecurse} = 0;
    }

    if (!defined($opts->{zfsGetType})) {
        $opts->{zfsGetType} = 0;
    }

    $opts->{help} && do {
        pod2usage(-exitval => 'NOEXIT');

### RM_COMM_4_TEST ###  # remove ### RM_COMM_4_TEST ### comments for testing purpose.
### RM_COMM_4_TEST ###  $opts = {};

        return 1;
    };

    $opts->{man} && pod2usage(-exitstatus => 0, -verbose => 2);

    my $znapzend = ZnapZend->new($opts);

### RM_COMM_4_TEST_main ###  # remove ### RM_COMM_4_TEST_main ### comments for testing purpose.
### RM_COMM_4_TEST_main ###  return $znapzend->start;

    $znapzend->start;

    return 1;
}

main();

1;

__END__

=head1 NAME

znapzend - znapzend daemon

=head1 SYNOPSIS

B<znapzend> [I<options>...]

 --man                  show man-page and exit
 --version              print version and exit
 -h,--help              display this help and exit
 -d,--debug             print debug messages to STDERR
 -n,--noaction          run in simulation mode: does no changes to filesystems
 --nodelay              if a backup plan has a zend_delay defined, ignore it
 --nodestroy            does all changes to the filesystem except destroy
 --forbidDestRollback   forbids forcing destination dataset rollback and
                        removal of datasets/snapshots not present on source
 --logto=x              select where to log (syslog::<facility> or <filepath>)
 --loglevel=x           define the log level
 --pidfile=x            write a pid file when running in daemon mode
 --daemonize            fork into the background
 --runonce=[x]          run one round on the optionally provided dataset
 -r,--recursive         recurse from the given "run-once" dataset
 --inherited            allow "run-once" on dataset which only inherits a plan
 --since=x              allow to consider a non-automatic common snapshot "x" as a starting point
 --sinceForced=x        same as --since=x but also make sure "x" exists on
                        destination (can cause removal of newer snapshots)
 --forcedSnapshotSuffix=x  use non-generated snapshot suffix for this "run-once"
 --features=x           comma separated list of features to be enabled
                        (detailed in man page):
    oracleMode recvu compressed sendRaw lowmemRecurse zfsGetType
    skipIntermediates sendIntermediates forbidDestRollback
 -i/-I                  a "zfs send" compatible on/off for skipIntermediates
 --rootExec=x           exec zfs with this command to obtain root privileges
                        (sudo or pfexec)
 --connectTimeout=x     sets the ConnectTimeout for ssh commands
 --autoCreation         automatically create dataset on destination if it does
                        not exist
 --timeWarp=x           shift znapzend's sense of NOW into the future
                        by x seconds
 --skipOnPreSnapCmdFail skip snapshots if the pre-snap-command fails
 --skipOnPreSendCmdFail skip replication if the pre-send-command fails
 --cleanOffline         clean up source snapshots even if a destination was offline
 --mailErrorSummaryTo=rcpt  if "send task(s) failed", mail a summary to rcpt

=head1 DESCRIPTION

ZnapZend is a snapshot based zfs backup daemon creating snapshots on a
scheduled basis on the source filesystem and on destination filesystems.

ZnapZend reads its configuration from custom properties in the dataset.
Use L<znapzendzetup> to set and list these properties.

=over

=item B<-d>, B<--debug>

talk a lot while running. Sends debug messages to stderr.

=item B<-n>, B<--noaction>

don't do any actions which have lasting effect. Ideal to try out new
configurations together with B<--debug>

=item B<--nodelay>

if a backup plan has a zend_delay defined, ignore it (should help when
debugging the backup plan configurations, without waiting for hours to
run them during experiments)

=item B<--nodestroy>

do all changes to the filesystem except destroying old snapshots after
a successful run to snapshot and replicate to backup destinations

=item B<--forbidDestRollback>

by default znapzend uses B<zfs recv -F> which rolls back destination to
the most recent snapshot, and in case of incremental replication streams
can also destroy snapshots and other child datasets not present in the
source.

The B<--forbidDestRollback> option allows to not enforce this mode, so
znapzend would instead report errors if destination is not ready to
receive data without applying such operations. This can be useful for
maintenance of backup destinations, but can be counter-productive for
regularly scheduled operations.

=item B<--logto>={B<syslog::>I<facility>|I<filepath>}

send logs out to either syslog or a logfile. Default is to send logs to
B<syslog::daemon> when running daemonized. When running in debug mode, the
logs will go to STDERR by default.

Examples:

 --logto=/var/log/znapzend.log
 --logto=syslog::daemon

=item B<--loglevel>={B<debug>|B<info>|B<warning>|B<err>|B<alert>}

Define the log level when logging to file. Default is debug.

=item B<--pidfile>=I<path>

write a pid file when running in daemon mode
B<pidfile> defaults to I</var/run/znapzend.pid> if no pidfile is given

=item B<--daemonize>

Fork into the background.

=item B<--runonce>=[I<dataset>]

run one round on source I<dataset> or on all datasets if empty.
This is very useful for testing. Use it in connection with B<--noaction> and
B<--debug> while testing your new configuration, or with B<--recursive> for
quick back up of whatever has backup plans in a single pool or dataset tree.

=item B<--forcedSnapshotSuffix>=I<snapsuffix>

for this round of B<--runonce> create the specified snapshot suffix, rather
than one generated by a pattern managed by 'znapzend'. Note it would not get
automatically cleaned per retention policy, either. This is primarily intended
for manual operations where a sysadmin wants for mark the certain data state
of local significance while making an unscheduled backup.

This option is not valid if the run-once mode is not requested.

While basic sanity checks are done, it is up to the caller to specify a
correct string value according to the rules of their ZFS version, and to
take care that it is unique (no snapshots with such suffix already exist).

See also B<--since>=I<snapsuffix>

=item B<--since>=I<snapsuffix>, B<--sinceForced>=I<snapsuffix>

Enables to see a snapshot named I<snapsuffix> in the history of the dataset
under source and target pools, which should be a common snapshot present on
both (or only on the source pool), and may be their newest common snapshot
(that would otherwise be blocking the sync as something that needs to be
removed for a full auto-creation replication and/or a usual replication).

If the target dataset does not have this named snapshot, but the otherwise
discovered history of automatic snapshots allows to include it into the usual
replication, it will be sent (and a newer automatically-named snapshot would
be made and sent). In other cases, it is up to the systems administrator to
make the original systems data consistent with that on their remote ZFS pool.

One could then use B<--sinceForced>=I<snapsuffix> variant that can remove
from destination any snapshots that are newer than I<snapsuffix> to make way
if needed, and ensure that this snapshot appears in destination's history;
by default with B<--since>=I<snapsuffix> such removals should not happen.

Unlike the B<--forcedSnapshotSuffix>=I<snapsuffix>, this option does not
define how the new snapshot made during this run would be called (it would
be named by common timestamp-based pattern to not get in the way of subsequent
regular replications).

The dataset name provided in this argument will never be removed during this
run-once (even if it matches the configured znapzend timestamp-based snapshot
naming pattern and is old enough to rotate away in other circumstances).

Example:

  znapzend --runonce=rpool/ROOT --since=20200101-01-finallyStableSetup

=item B<-r>, B<--recursive>

when backing up a particular dataset with B<--runonce>=[I<dataset>], do not
just look at this dataset's backup plan, but iterate into its children that
might have any. Useful for quick backups of a pool whose root dataset has no
I<znapzendzetup> configurations defined, but some trees under it do.

See below for interaction with B<--inherited> option and examples.

=item B<--inherited>

when backing up a particular dataset with B<--runonce>=[I<dataset>], do not
require that the provided dataset directly has a locally defined backup plan
configuration (sanity check that it is not "received" via ZFS replication).
With this option in place, allow also to quickly snapshot and/or replicate
datasets of subtrees which inherit a backup plan from a dataset which has it
in its attributes whose source is "local".

=item B<--inherited> and B<--recursive> together

At this time, the B<--inherited> is "sort of exclusive" vs. B<--recursive>
mode, and if both are specified the behavior depends on whether the requested
dataset has some (local or inherited-from-local) backup plan. If a dataset
without a backup plan is passed, the script should effectively ignore the
B<--inherited> option and just recurse until it finds some dataset(s) with
proper locally defined backup plan(s) (note that finding inherited plans
first is highly unlikely due to ZFS attribute inheritance from parents to
children).

Examples below assume the following dataset layout:

+ I<tank> (no znapzend config)
| + I<export> (local znapzend config)
| | + I<home> (inherited znapzend config from local tank/export)
| |   + I<user> (inherited znapzend config from local tank/export)
| |     + I<sources> (inherited znapzend config from local tank/export)
| |     + I<documents> (inherited znapzend config from local tank/export)
| |       + I<pictures> (inherited znapzend config from local tank/export)
| + I<var> (local znapzend config)
+ I<usbbackup> (no znapzend config)
  + I<snapshots> (no znapzend config)
    + I<export> (received znapzend config)
    | + I<home> (inherited from received usbbackup/snapshots/export)
    |   + I<user> (inherited from received usbbackup/snapshots/export)
    |     + I<sources> (inherited from received usbbackup/snapshots/export)
    |     + I<documents> (inherited from received usbbackup/snapshots/export)
    |       + I<documents> (inherited from received usbbackup/snapshots/export)
    + I<var> (received znapzend config)

Examples:

  znapzend --recursive --runonce=tank

This should walk all filesystem or volume datasets defined anywhere under
I<tank> (root dataset of same-named pool) and snapshot/replicate the found
datasets that have a backup plan configured "locally", such as I<tank/export>
and I<tank/var>, according to configuration (e.g. with children and beyond
if the corresponding backup plan's I<recursive=on> option is specified).
It should not waste time looking for datasets under the I<usbbackup> pool.

  znapzend --runonce=tank/export/home/user/documents

Given that only I<tank/export> "locally" defines a znapzend backup plan, the
default B<znapzend> behavior with a descendant dataset would be to find no
configuration (sourced as "local" right in it) and so would "run-once" nothing.

  znapzend --recursive --runonce=tank/export/home

Same (no config found) for recursion starting from a dataset with inherited
backup plan configuration (assuming none of its descendants have a "local"
config of their own).

  znapzend --inherited --runonce=tank/export/home/user/documents

With the B<--inherited> option however it would recognize this descendant
dataset as having a backup plan configuration inherited from I<tank/export>,
would then look at I<tank/export> and confirm that it has this configuration
from a "local" source, and should B<znapzend runonce> just this dataset and
its descendants (so including I<tank/export/home/user/documents/pictures>,
but not including siblings like I<tank/export/home/user/sources>).

  znapzend --inherited --recursive --runonce=tank

Since I<tank> has no backup plan, B<znapzend> should recurse and find the
nearest datasets with configured plans, I<tank/export> and I<tank/var>, and
process them according to configuration.

  znapzend --inherited --recursive --runonce=tank/export

Since I<tank/export> has a locally defined backup plan, B<znapzend> should
process it according to configuration.

  znapzend --inherited --recursive --runonce=tank/export/home

Since I<tank/export/home> has a backup plan inherited from a locally defined
one in I<tank/export>, B<znapzend> should process it according to configuration.

  znapzend --recursive --runonce=usbbackup
  znapzend --inherited --runonce=usbbackup/snapshots
  znapzend --inherited --recursive --runonce=usbbackup/snapshots/export/home

Neither of these runs should do anything, because all datasets involved
(including those found by a recursive walk) under I<usbbackup> have neither
a local definition of a backup plan, nor one inherited from a local definition.


=item B<--features>=I<feature1>,I<feature2>,...

enables enhanced zfs features not supported by all zfs implementations.
Do not enable features unless you are sure your zfs supports (or requires) it

Available features:

=over

=item oracleMode

working around the following zfs issues we have seen on oracle:

=over

=item *

The multi snapshot destroy syntax is not available. So stick to destroying
them individually.

=item *

Sometimes a snapshot can not be destroyed because of some oracle zfs bug.
Only a reboot seems to be able to fix this. So we just destroy the ones we
can destroy. Logging an error about the problem

=back

=item recvu

use the -u option on the receive end, to keep the destination zfs
filesystems unmounted.

=item resume

use the -s option on the receive end, to support resumable transfer
with receive_resume_token.

If you have aborted a resumable-send, you should either do a resume-send using -t <token>
to continue it or clear the half-received snapshot using zfs receive -A <filesystem>

Example:

 zfs get receive_resume_token tank/test
 zfs send -t <token> | zfs receive -s tank/test

=item compressed

use 'compressed' to add options -Lce to the zfs send command

Even if a source and destination datasets are both using compression,
zfs send will, by default, decompress the data before sending, and
zfs recv will then compress it again before writing it to disk.
Using -c will skip the unnecessary decompress-compress stages.
This decreases CPU load on both source and destination as well
as reduces network bandwidth usage.

The -L option is for large block support and -e is for embedded data
support. These may require certain (Open)ZFS features to be enabled.

=item sendRaw

use 'sendRaw' to add option -w to zfs send commands

For encrypted source datasets this instructs zfs not to decrypt
before sending which results in a remote backup that can't be read
without the encryption key/passphrase, useful when the remote isn't
fully trusted or not physically secure. This option must be used
consistently, raw incrementals cannot be based on non-raw snapshots
and vice versa.

=item sendIntermediates

The opposite of I<skipIntermediates>, detailed in the next section.
This is the default mode of operation, and only included here to
allow for completeness and explicitness of your configurations.

=item skipIntermediates

Enable the 'skipIntermediates' feature to send a single increment
between latest common snapshot and the newly made one. It may skip
several source snaps if the destination was offline for some time,
and it should skip snapshots not managed by znapzend. Normally for
online destinations, the new snapshot is sent as soon as it is
created on the source, so there are no automatic increments to skip.

By default 'znapzend' uses the 'zfs send -I' option on the sending
end (so to include all intermediate snapshots), rather than '-i'
(which would send a single big increment), to keep the destination
zfs dataset history similar to the source's one. This implies the
-I<sendIntermediates> mode of operation.

Note: it was the default from beginning of 'znapzend' to make sure
that in case a send operation takes too long, we still get all the
intermediate snapshots sent to the destination.
Beware or benefit that this can deliver not only automatic, but also
manually named snapshots to your destination which would have to be
cleaned manually when no longer wanted there, but that they can be
used as common snapshots to repair broken syncs e.g. with I<--since=X>
options.

With the 'skipIntermediates' feature disabled, all snapshots between
the latest common one and the newly created one on the source would
be sent to each destination, and then the "extra" ones of those managed
by znapzend may get discarded according to destination's retention
policy. Note that snapshots which are NOT managed by znapzend, e.g.
ones you created manually, would appear on the destination and stay
there until removed manually. Also note that this may consume more
disk space and transfer time than otherwise needed for the data
increment.

This original mode is now primarily recommended for irregular backups
(on removable media or over unstable links) and cases where the
sysadmin marks certain data states via snapshots as special (e.g.
"before_upgrade", etc.)

The feature is recommended to be part of your 'znapzend' service
setup for regular runs in stable storage/networking conditions.

=item forbidDestRollback

Avoid use of destructive 'zfs recv -F' in favor of failing to receive;
more details in description of B<--forbidDestRollback> CLI option above.

=item lowmemRecurse

use 'lowmemRecurse' on systems where you have too many datasets,
so a recursive listing of attributes to find backup plans exhausts
the memory available to `znapzend(zetup)`: instead, go the slower
way to first list all impacted dataset names, and then query their
configs one by one.

=item zfsGetType

use 'zfsGetType' if your 'zfs get' supports a '-t' argument for
filtering by dataset type at all (e.g. one in Solaris 10 does not),
AND lists properties for snapshots by default when recursing (e.g.
the one in Solaris 10u8 already does), so that there is too much
data to process while searching for backup plans.

If these two conditions apply to your system, the time needed for
a '--recursive' search for backup plans can literally differ by
hundreds of times (depending on the amount of snapshots in that
dataset tree... and a decent backup plan will ensure you have a
lot of those), so you would benefit from requesting this feature.

This feature should not impact the default (non- '--recursive')
listings however.

=back

=item B<--rootExec>={sudo|pfexec}

Execute zfs with this command, 'sudo' or 'pfexec', to
obtain root privileges. This is often necessary when running znapzend as a
non-privileged user with a zfs install that doesn't support finer permission
controls. This also applies to the zfs commands ran on remote servers over ssh.

For sudo, the /etc/sudoers file will need to be modified to allow for
passwordless access to zfs commands if znapzend is to be ran as a daemon or
the system will be used as a remote. Many ZFS installations include an
/etc/sudoers.d/zfs file as an example.

=item B<--connectTimeout>=I<timeout>

sets the ssh connection timeout (in seconds)

=item B<--autoCreation>

Automatically create a dataset on a destination host if it's not there yet.

=item B<--timeWarp>=x

Shift ZnapZends sense of time into the future by x seconds.

The practical application if this function is to determine what will happen
at some future point in time. This can be useful for testing but also when
running in B<noaction> and B<debug> mode to determine which snapshots would
be created and removed at some future point in time.

=item B<--skipOnPreSnapCmdFail>

Prevent snapshots of a dataset from being taken when it has a B<pre-snap-command>
defined and the command returns a non-zero exit code or is killed by a signal.

=item B<--skipOnPreSendCmdFail>

Prevent snapshots of a dataset from being replicated to a destination when
it has a B<pre-snap-command> defined and the command returns a non-zero exit
code or is killed by a signal.

=item B<--cleanOffline>

Clean snapshots of a source dataset even if one or more destination datasets
failed during replication for whatever reason (destination offline, destination
full, destination pool became read-only due to storage issues, source too full
to make a snapshot, etc.).

The most recent common snapshot for each destination (as tracked on source for
resilience) will not be deleted from source, but this is still a potentially
dangerous option: if the preserved snapshot somehow gets deleted from the
destination, it may require a full re-replication the next time it is online.

=item B<--mailErrorSummaryTo=rcpt(@domain),...>

If this argument is passed, a copy of error summary would be sent there by
your system's command line mailer program. It is then up to this program and
system setup to validate the recipient names (local or domain-suffixed) and
deliver the message to some mailbox.

This feature relies on a program which supports standard sendmail-like CLI.

=back

=head1 EXAMPLE

To test a new config:

 znapzend --debug --noaction --runonce=tank/test

To see what is going to happen in one hour:

 znapzend --debug --noaction --timeWarp=3600 --runonce=tank/test

To run as a daemon:

 znapzend --daemonize --pidfile=/var/run/znapzend.pid --logto=syslog::daemon

=head1 COPYRIGHT

Copyright (c) 2014 by OETIKER+PARTNER AG. All rights reserved.

=head1 LICENSE

This program is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.

This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.

You should have received a copy of the GNU General Public License along with
this program. If not, see L<http://www.gnu.org/licenses/>.

=head1 AUTHOR

S<Tobias Oetiker E<lt>tobi@oetiker.chE<gt>>,
S<Dominik Hassler E<lt>hadfl@cpan.orgE<gt>>

=head1 HISTORY

 2014-06-01 had Multi destination backup
 2014-05-30 had Initial Version

=cut
