#
# Convert RCS dates to hex timestamps and group into commit clusters
#
# Copyright (c) 2009-2013, 2019 Andreas Gustafsson.  All rights reserved.
# Please refer to the file COPYRIGHT for detailed copyright information.
#

from __future__ import print_function

import itertools
import sys

from sys import stdin
from utils import rcs2ts

# Cluster commits separated by less than this many seconds
limit = 15

ts_fn = sys.argv[1]
offset_fn = sys.argv[2]

# This is a key function for use with itertools.groupby(),
# grouping items into clusters where the difference between
# adjacent items is less than "limit" and the committer is the same.

def cluster_by(limit):
    # "prevts" is the previous timestamp (seconds since epoch)
    # "unique" is a unique integer identifying this cluster
    # prevts and unique would be separate captured variables
    # if python had a clean way to assign to them
    state = { 'prevts': 0, 'prevcommitter': '', 'unique': 0 }
    unique = 0
    def key(v):
        ts, committer, offset = v
        if ts - state['prevts'] >= limit or \
           committer != state['prevcommitter'] and ts != state['prevts']:
            state['unique'] += 1
        state['prevts'] = ts
        state['prevcommitter'] = committer
        return state['unique']
    return key

# Keep track of the offset within the input file
# XXX this may not work correctly on systems using CRLF
offset = 0

# Given a line of rcsdates output, return a tuple of
#   - the time stamp as an integer number of seconds since the epoch
#   - the committer name
#   - the offset within the input.
#
# Also update the current offset (global).

def line2tso(line):
    global offset
    offset0 = offset
    offset += len(line)
    fields = line.rstrip().split(" ")
    rcsdate = fields[0]
    committer = fields[1]
    return rcs2ts(rcsdate), committer, offset0

ts_f = open(ts_fn, "w")
offset_f = open(offset_fn, "w")

for k, g in itertools.groupby(map(line2tso, stdin),
                              cluster_by(limit)):
    t = list(g)
    # Take the timestamp from the last commit in the cluster,
    # and the offset from the first commit in the cluster
    print("%08x" % t[-1][0], file=ts_f)
    print("%08x" % t[0][2], file=offset_f)

ts_f.close()
offset_f.close()
