diff --git a/README.md b/README.md index 0cccb77..4458025 100644 --- a/README.md +++ b/README.md @@ -1 +1,10 @@ ## This is a collection of tools to make working with [repo](https://gerrit.googlesource.com/git-repo) easier + +## repo-bisect + +Similar to **git bisect**, but works with an entire repo. + +Note that due to the way certain code review tools like [gerrit](https://www.gerritcodereview.com/) +work, the git *committer date* may reflect when commits are uploaded +for review rather than when they are actually introduced into the +project. diff --git a/repo-bisect b/repo-bisect new file mode 100644 index 0000000..6e4abf1 --- /dev/null +++ b/repo-bisect @@ -0,0 +1,258 @@ +#!/usr/bin/python + +import os +import sys +import getopt +import subprocess +import urllib2 +import json +import re +import datetime +import xml.etree.ElementTree as ElementTree + +cfg = dict() +cfg['verbose'] = 0 +cfg['nosync'] = False + +cwd = os.getcwd() +if not os.path.exists('.repo'): + sys.stderr.write("Not a repo\n") + sys.exit(1) + +def verbose(s): + if cfg['verbose'] > 0: + sys.stderr.write(s) + +# Valid formats: +# YYYY-MM-DD-HH-MM-SS +# YYYY-MM-DD-HH +# YYYY-MM-DD +def string_to_datetime(s): + fields = s.split('-') + if len(fields) == 6: + return datetime.datetime(int(fields[0]), int(fields[1]), int(fields[2]), + int(fields[3]), int(fields[4]), int(fields[5])) + if len(fields) == 4: + return datetime.datetime(int(fields[0]), int(fields[1]), int(fields[2]), + int(fields[3])) + if len(fields) == 3: + return datetime.datetime(int(fields[0]), int(fields[1]), int(fields[2])) + return None + +def datetime_to_string(dt): + if dt.hour == 0 and dt.minute == 0 and dt.second == 0: + return dt.strftime("%Y-%m-%d") + return dt.strftime("%Y-%m-%d-%H-%M-%S") + +# Find mid point between two datetime objects. +# +# Granularity is 1 hour. +# +# If the objects differ by less than 2 hours, exit. +# +# If the objects differ by at least 2 days and both hour components are +# zero, the resulting hour component will be zero. +def datetime_mid(d1, d2): + delta = (d2 - d1) / 2 + if delta.total_seconds() < 3600: + print "No more dates left to test" + sys.exit(0) + mid = d1 + delta + if mid.minute != 0 or mid.second != 0: + mid = mid.replace(minute=0, second=0) + if delta.total_seconds() > 86400 and d1.hour == 0 and d2.hour == 0: + if mid.hour != 0: + mid = mid.replace(hour=0) + return mid + +def git_config(key): + args = ['git', 'config', key] + child = subprocess.Popen(args, stdin=None, stdout=subprocess.PIPE, stderr=None) + out, err = child.communicate() + if child.returncode != 0: + sys.stderr.write('Failed to read git config\n') + sys.exit(1) + return out.strip() + +def git_reset_hard(rev): + args = ['git', 'reset', '--hard', rev] + child = subprocess.Popen(args, stdin=None, stdout=subprocess.PIPE, stderr=None) + out, err = child.communicate() + if child.returncode != 0: + sys.stderr.write('Failed to reset git tree\n') + sys.exit(1) + +def git_checkout(rev): + args = ['git', 'checkout', rev] + child = subprocess.Popen(args, stdin=None, stdout=subprocess.PIPE, stderr=None) + out, err = child.communicate() + if child.returncode != 0: + sys.stderr.write('Failed to checkout git tree\n') + sys.exit(1) + +def git_rev_by_date(date, branch): + args = ['git', 'rev-list', '--max-count=1'] + args.append('--until=%s' % (date)) + args.append(branch) + child = subprocess.Popen(args, stdin=None, stdout=subprocess.PIPE, stderr=None) + out, err = child.communicate() + if child.returncode != 0: + sys.stderr.write('Failed to read git log\n') + sys.exit(1) + if not out: + sys.stderr.write('Failed to find rev') + sys.exit(1) + return out.strip() + +def repo_manifest(): + args = [] + args.append('repo') + args.append('manifest') + child = subprocess.Popen(args, stdin=None, stdout=subprocess.PIPE, stderr=None) + out, err = child.communicate() + if child.returncode != 0: + sys.stderr.write('Failed to read manifest\n') + sys.exit(1) + return ElementTree.fromstring(out) + +def repo_sync_to_date(date): + print "bisect: sync to %s" % (date) + + # Set manifest revision + os.chdir('.repo/manifests') + remote = git_config('branch.default.remote') + branch = git_config('branch.default.merge') + manifest_rev = git_rev_by_date(date, '%s/%s' % (remote, branch)) + verbose("manifest revision %s\n" % (manifest_rev)) + git_reset_hard(manifest_rev) + os.chdir(cwd) + + # Fetch the manifest + manifest = repo_manifest() + + # Find default remote and revision + for elem in manifest.findall('default'): + def_remote = elem.get('remote') + def_revision = elem.get('revision') + if def_revision.startswith('refs/heads/'): + # refs/heads/branch => branch + def_revision = def_revision.split('/')[2] + + # Update manifest revisions + for elem in manifest.findall('project'): + project_name = elem.get('name') + project_path = elem.get('path', project_name) + project_remote = elem.get('remote', def_remote) + project_revision = elem.get('revision', def_revision) + if project_revision.startswith('refs/tags'): + manifest_revision = project_revision.split('/')[2] + else: + manifest_revision = "%s/%s" % (project_remote, project_revision) + os.chdir(".repo/projects/%s.git" % (project_path)) + rev = git_rev_by_date(date, manifest_revision) + os.chdir(cwd) + elem.set('revision', rev) + + # Write new manifest + pathname = "%s/.repo/manifests/bisect-%s.xml" % (cwd, date) + + ElementTree.ElementTree(manifest).write(pathname) + + # Sync the working tree. Note: + # + # - Both "repo manifest" and "repo sync -m" include local manifests, + # so we must move the local manifests out of the way temporarily + # to avoid duplicate project errors. + # + # - "repo sync" always syncs up the main manifest (even with -m), so + # we must reset the main manifest after we sync. + + have_local = os.path.exists('.repo/local_manifests') + if have_local: + os.rename('.repo/local_manifests', '.repo/local_manifests.hide') + + args = ['repo', 'sync', '-l', '-m', pathname] + child = subprocess.Popen(args, stdin=None, stdout=subprocess.PIPE, stderr=None) + out, err = child.communicate() + if child.returncode != 0: + sys.stderr.write('Failed to sync\n') + + if have_local: + os.rename('.repo/local_manifests.hide', '.repo/local_manifests') + + os.chdir('.repo/manifests') + git_reset_hard(manifest_rev) + os.chdir(cwd) + +def state_read(): + s = dict() + f = open('.repo/bisect', 'r') + for line in f: + k,v = line.strip().split('=') + s[k] = v + f.close() + return s + +def state_write(s): + f = open('.repo/bisect', 'w') + f.write("start=%s\n" % (s['start'])) + f.write("end=%s\n" % (s['end'])) + f.close() + +def state_delete(): + os.unlink('.repo/bisect') + +def usage(): + print "Usage:" + print " repo-bisect [args] start yyyy-mm-dd yyyy-mm-dd" + print " repo-bisect [args] " + print " --verbose Increase verbose level" + sys.exit(1) + +### Main code ### + +optargs, argv = getopt.getopt(sys.argv[1:], 'v', + ['verbose', 'nosync']) +for k, v in optargs: + if k in ('-n', '--nosync'): + cfg['nosync'] = True + if k in ('-v', '--verbose'): + cfg['verbose'] += 1 + +if len(argv) < 1: + usage() + +action = argv[0] +if action == 'start': + if len(argv) < 3: + usage() + state = dict() + state['start'] = argv[1] + state['end'] = argv[2] + state_write(state) + if not cfg['nosync']: + repo_sync_to_date(state['start']) +if action == 'good': + state = state_read() + d1 = string_to_datetime(state['start']) + d2 = string_to_datetime(state['end']) + mid = datetime_mid(d1, d2) + state['start'] = datetime_to_string(mid) + state_write(state) + print "bisect: start=%s, end=%s" % (state['start'], state['end']) + if not cfg['nosync']: + repo_sync_to_date(state['start']) +if action == 'bad': + state = state_read() + d1 = string_to_datetime(state['start']) + d2 = string_to_datetime(state['end']) + mid = datetime_mid(d1, d2) + state['end'] = datetime_to_string(mid) + state_write(state) + print "bisect: start=%s, end=%s" % (state['start'], state['end']) + if not cfg['nosync']: + repo_sync_to_date(state['start']) +if action == 'reset': + state_delete() + +sys.exit(0)