# Git support (work in progress)

# Copyright (c) 2020-2023 Andreas Gustafsson.  All rights reserved.
# Please refer to the file COPYRIGHT for detailed copyright information.

# XXXTODO This hardcodes the src module.  This doesn't handle
# the case of a code base split into multiple modules, such as
# src+xsrc, displayed on a shared timeline.  It should be changed
# to iterate over the modules and merge the data from all of them
# into the "dates" array.  We also need a new data structure to
# keep track of which commit belongs to which module.
module = 'src'

import os
import subprocess
from git import Repo
import html

import bracket
from bracket import branch_name, cno2ts, config, last_commit_cno, run, runv, ts2rcs, remove_email
from bracket import Commit, RepositoryUpdateFailed
from htmlgen import div
from report import link_if

def default_branch():
    return 'trunk'

def setup():
    pass

# Return the pathname to git module "module"

def _git_dir(module):
    repo_dir = config['repo_root']
    module_dir = os.path.join(config['repo_root'], module) + '.git'
    return module_dir

# Return the beginning of an argv for invoking git on module "module"

def _git_cmdv(module):
    return ['git', '--git-dir', _git_dir(module)]

def update_repo_module(module):
    repo_dir = config['repo_root']
    module_dir = _git_dir(module)
    if not os.path.exists(module_dir):
        # Initial clone
        # '--no-checkout'
        # --mirror implies --bare
        status = runv(['git', 'clone', '--mirror',
                       'https://github.com/NetBSD/%s.git' % module],
                      cwd = repo_dir)
        if status != 0:
            raise RepositoryUpdateFailed()
    else:
        # Update existing tree
        status = runv(_git_cmdv(module) + ['remote', 'update'])
    if status != 0:
        raise RepositoryUpdateFailed()

def index_repo():
    # Git repositories are well indexed in themselves, so we
    # don't need to do any indexing ahead of time; querying the
    # repository directly in read_dates() is fast enough.
    pass

def read_dates():
    print("begin read dates")
    cmd = _git_cmdv(module) + \
        ['rev-list', '--first-parent', '--reverse', '--topo-order',
         '--timestamp', branch_name(config)]
    child = subprocess.Popen(cmd, stdout=subprocess.PIPE)
    pipe = child.stdout
    prev_commit_ts = 0
    dates = []
    ts2sha = {}
    for line in pipe:
        commit_ts_str, sha = line.rstrip().split()
        commit_ts = int(commit_ts_str)
        # Deal with non-monontonically-increasing commit timestamps by
        # adjusting each timestamp to be at least one second after the
        # previous one.  This should work well enough with a repo
        # converted from CVS, but if or when we start accepting direct
        # git commits with unchecked commit timestamps, we may have
        # to add fancier heuristics that basically amount to solving
        # what is known as the "Longest Increasing Subsequence" problem.
        if commit_ts <= prev_commit_ts:
            print("warning: time did not increase between commits "
                  "at %d (diff %d), fudging to %d" %
                  (commit_ts, commit_ts - prev_commit_ts,
                   prev_commit_ts + 1))
            commit_ts = prev_commit_ts + 1
        prev_commit_ts = commit_ts
        dates.append(commit_ts)
        ts2sha[commit_ts] = sha.decode('ascii')
    bracket.dates = dates
    bracket.ts2sha = ts2sha
    pipe.close()
    child.wait()
    print("end read dates")

def checkout(branch, module, ts, builddir, logfd):
    # Use git archive rather than clone to avoid the needless expense
    # of creating git metadata files in the source tree.
    # XXX unsafe shell command
    return run('git --git-dir=%s archive --prefix %s/ %s | (cd %s && tar xfp -)' %
        (_git_dir(module), module, bracket.ts2sha[ts], builddir))

def last_safe_commit_ts():
    # With git, there are no unsafe commits
    return cno2ts(last_commit_cno())

def get_commits(ts0, ts1):
    print("get_commits", ts2rcs(ts0), ts2rcs(ts1))
    bracket.read_dates()
    our_dates = [ts for ts in bracket.dates if ts >= ts0 and ts < ts1]

    module = 'src'
    repo = Repo(_git_dir(module))

    commits = []
    for ts in our_dates:
        sha = bracket.ts2sha[ts]
        gitc = repo.commit(sha)
        files = []
        for d in gitc.diff(gitc.parents[0]):
            # Use either path to handle added or deleted files
            files.append(d.a_path or d.b_path)
        commit = Commit()
        commit.timestamp = ts
        commit.committer = gitc.committer.name
        commit.files = files
        commit.revision = sha
        commits.append(commit)

    return commits

def format_commit_html(c):
    url = 'https://github.com/NetBSD/src/commit/' + c.revision
    return \
        div(' '.join([
            'commit',
            link_if(url, url, c.revision),
            html.escape(remove_email(c.committer))])) + \
        "\n" + \
        "\n".join([div({'class': 'file'}, f) for f in c.files])

def format_commit_email(c):
    r = f"    commit {c.revision} by {remove_email(c.committer)}\n"
    for file in c.files:
        r += f"        {file}\n"
    return r
