#!/usr/bin/python import os import sys import time import errno import string import argparse import tempfile from subprocess import Popen, PIPE from xml.etree import ElementTree from zipfile import ZipFile, ZIP_DEFLATED EEOL = "\x1b[0K" if sys.stdout.isatty(): spfx = '\r' ssfx = EEOL else: spfx = '' ssfx = '\n' args = None def logi(msg): sys.stdout.write("%s\n" % (msg.rstrip('\n'))) sys.stdout.flush() def loge(msg): sys.stderr.write("%s\n" % (msg.rstrip('\n'))) def logv(level, msg): if args.verbose >= level: logi(msg) def readfile(filename): f = open(filename, 'r') buf = f.read() f.close() return buf def writefile(filename, buf): f = open(filename, 'w') f.write(buf) f.close() def mkdir_p(path): parent = os.path.dirname(path) if parent and not os.path.isdir(parent): mkdir_p(parent) try: os.mkdir(path) except OSError as e: if e.errno != errno.EEXIST: raise def rmtree(path): for root, dirs, files in os.walk(path): for dirname in dirs: rmtree("%s/%s" % (root, dirname)) for filename in files: os.unlink("%s/%s" % (root, filename)) os.rmdir(path) def syscmd(argv, stdin=None, expect_rc=0): try: child = Popen(argv, stdin=PIPE, stdout=PIPE, stderr=PIPE) except BaseException as e: sys.stderr.write("Failed to run %s: %s\n" % (argv, str(e))) sys.exit(1) if stdin is not None: child.stdin.write(stdin) (out, err) = child.communicate() rc = child.returncode if rc != expect_rc: raise RuntimeError("Failed to run %s: rc=%d" % (argv, rc)) return (out, err) def gitcmd(dir, argv, stdin=None, expect_rc=0): git_argv = ['git'] if dir is not None: git_argv.extend(['-C', dir]) git_argv.extend(argv) (out, err) = syscmd(git_argv, None, expect_rc) return out def is_sha1(s): if len(s) != 40: return False for c in s: if not c in string.hexdigits: return False return True def git_ref_to_rev(remote, ref): if ref.startswith('refs/'): segs = ref.split('/') if segs[1] == 'heads': branch = '/'.join(segs[2:]) rev = "%s/%s" % (remote, branch) elif segs[1] == 'tags': segs = ref.split('/') tag = '/'.join(segs[2:]) rev = "%s^0" % (tag) else: raise RuntimeError("Unknown ref type: %s" % (ref)) else: if is_sha1(ref): rev = ref else: rev = "%s/%s" % (remote, ref) return rev class Archiver: @classmethod def instance(cls, location, mode): if location.endswith('.zip'): return ZipArchiver(location, mode) else: return FilesystemArchiver(location, mode) def listdir(self, dirname): raise NotImplementedError() def extractdir(self, srcdir, dstdir, recursive=False): raise NotImplementedError() def read(self, pathname): raise NotImplementedError() def write(self, pathname, buf): raise NotImplementedError() def close(self): raise NotImplementedError() class ZipArchiver(Archiver): def __init__(self, filename, mode): self._zip = ZipFile(filename, mode, ZIP_DEFLATED) def listdir(self, dirname): res = set() prefix = dirname if not prefix.endswith('/'): prefix += '/' prefixlen = len(prefix) for srcfile in self._zip.namelist(): if not srcfile.startswith(prefix): continue relname = srcfile[prefixlen:] res.add(relname.split('/', 1)[0]) return list(res) def extractdir(self, srcdir, dstdir, recursive=False): prefix = srcdir if not prefix.endswith('/'): prefix += '/' prefixlen = len(prefix) for srcfile in self._zip.namelist(): if not srcfile.startswith(prefix): continue relname = srcfile[prefixlen:] if not recursive and relname.find('/') != -1: continue dstfile = "%s/%s" % (dstdir, relname) f = self._zip.open(srcfile) buf = f.read() f.close() mkdir_p(os.path.dirname(dstfile)) writefile(dstfile, buf) def read(self, pathname): try: f = self._zip.open(pathname) buf = f.read() f.close() except KeyError: raise IOError(errno.ENOENT, "No such file or directory: %s" % (pathname)) return buf def write(self, pathname, buf): f = self._zip.writestr(pathname, buf) def close(self): self._zip.close() class FilesystemArchiver(Archiver): def __init__(self, dirname, mode): self._dirname = dirname self._mode = mode if mode == 'r': if not os.path.isdir(dirname): raise IOError("Directory %s does not exist" % (dirname)) else: mkdir_p(dirname) def listdir(self, dirname): try: res = os.listdir("%s/%s" % (self._dirname, dirname)) except OSError as e: if e.errno != errno.ENOENT: raise res = [] return res def extractdir(self, srcdir, dstdir, recursive=False): mkdir_p(dstdir) for name in self.listdir(srcdir): srcname = "%s/%s/%s" % (self._dirname, srcdir, name) dstname = "%s/%s" % (dstdir, name) if os.path.isdir(srcname): if recursive: srcsubdir = "%s/%s" % (srcdir, name) dstsubdir = "%s/%s" % (dstdir, name) self.extractdir(srcsubdir, dstsubdir, True) else: f = open(srcname, 'r') buf = f.read() f.close() f = open(dstname, 'w') f.write(buf) f.close() def read(self, pathname): full_pathname = "%s/%s" % (self._dirname, pathname) f = open(full_pathname, 'r') buf = f.read() f.close() return buf def write(self, pathname, buf): if self._mode == 'r': raise IOError("Cannot write in read-only mode") full_pathname = "%s/%s" % (self._dirname, pathname) mkdir_p(os.path.dirname(full_pathname)) f = open(full_pathname, 'w') f.write(buf) f.close() class Project: def _gitcmd(self, argv, expect_rc=0): return gitcmd(self.path(), argv, None, expect_rc) def __init__(self, path, remote_name, remote_ref): self._path = path self._remote_name = remote_name self._remote_ref = remote_ref def _get_info(self): # Find revision for local HEAD self._local_rev = self._gitcmd(['rev-parse', 'HEAD^0']).strip() # Find revision for remote ref self._remote_rev = self._gitcmd(['rev-parse', self._remote_ref]).strip() # Find local branch names and revs self._local_branches = dict() cmdbuf = self._gitcmd(['branch']) if cmdbuf: for line in cmdbuf.rstrip('\n').split('\n'): if line.startswith('* '): line = line[2:] branch = line.strip() if branch.startswith('('): continue rev = self._gitcmd(['rev-parse', branch]) self._local_branches[branch] = rev # Find remotes self._remotes = dict() cmdbuf = self._gitcmd(['remote', '-v']) if cmdbuf: for line in cmdbuf.rstrip('\n').split('\n'): fields = line.split() name = fields[0] url = fields[1] if name != self._remote_name: self._remotes[name] = url # Find remote branch names and revs self._remote_branches = dict() cmdbuf = self._gitcmd(['branch', '-r']) if cmdbuf: for line in cmdbuf.rstrip('\n').split('\n'): if line.find('->') != -1: continue branch = line.strip() rev = self._gitcmd(['rev-parse', branch]).strip() self._remote_branches[branch] = rev # Find tags and revs self._tags = dict() cmdbuf = self._gitcmd(['tag', '-l']) if cmdbuf: for line in cmdbuf.rstrip('\n').split('\n'): tag = line.strip() rev = self._gitcmd(['rev-parse', "%s^0" % (tag)]).strip() self._tags[tag] = rev self._required_remote_branches = set() def path(self): return self._path def detached_head(self): ref = self._gitcmd(['rev-parse', '--abbrev-ref', 'HEAD']).strip() return ref == 'HEAD' def branch_base(self, branch): cmdbuf = self._gitcmd(['log', '--pretty=format:%H', branch]) if cmdbuf: for line in cmdbuf.rstrip('\n').split('\n'): for name, hash in self._remote_branches.items(): if line == hash: return (name, hash) for name, hash in self._tags.items(): if line == hash: return (name, hash) raise RuntimeError("Cannot find base for project %s branch %s" % (self._path, branch)) def backup_branch(self, archiver, branch): dir = "%s/local_branches/%s" % (self.path(), branch) (base_name, base_hash) = self.branch_base(branch) if base_name != self._remote_ref: self._required_remote_branches.add(base_name) buf = "%s %s\n" % (base_name, base_hash) archiver.write("%s/.base" % (dir), buf) revision_range = "%s..%s" % (base_hash, branch) tmpdir = tempfile.mkdtemp() self._gitcmd(['format-patch', '-o', tmpdir, revision_range]) for name in os.listdir(tmpdir): buf = readfile("%s/%s" % (tmpdir, name)) archiver.write("%s/%s" % (dir, name), buf) rmtree(tmpdir) def backup(self, archiver): self._get_info() # Remotes if self._remotes: buf = '' for k, v in self._remotes.items(): buf += "%s %s\n" % (k, v) archiver.write("%s/remotes" % (self.path()), buf) # Local commits if self.detached_head() and self._local_rev != self._remote_rev: self.backup_branch(archiver, 'HEAD') # Local branches for branch, rev in self._local_branches.items(): self.backup_branch(archiver, branch) # Remote branches if self._required_remote_branches: buf = '' for branch in self._required_remote_branches: buf += "%s\n" % (branch) archiver.write("%s/remote_branches" % (self.path()), buf) # head head = self._gitcmd(['rev-parse', '--abbrev-ref', 'HEAD']).strip() if head == 'HEAD': head = self._gitcmd(['rev-parse', 'HEAD']).strip() if head != self._remote_rev: archiver.write("%s/head" % (self.path()), "%s\n" % (head)) # Local changes diff = self._gitcmd(['diff']) filenames = [] cmdbuf = self._gitcmd(['status', '--short', '--untracked-files']) if cmdbuf: for line in cmdbuf.rstrip('\n').split('\n'): fields = line.split(' ', 1) filenames.append(fields[1]) if diff.strip() or filenames: buf = diff for filename in filenames: buf += self._gitcmd(['diff', '--binary', '/dev/null', filename], 1) archiver.write("%s/diff" % (self.path()), buf) def restore_branch(self, archiver, branch): dir = "%s/local_branches/%s" % (self.path(), branch) buf = archiver.read("%s/.base" % (dir)) (base_name, base_hash) = buf.rstrip('\n').split(' ', 1) self._gitcmd(['checkout', base_hash]) if branch != 'HEAD': if branch in self._local_branches: if not args.force: raise RuntimeError("Project %s already has local branch %s" % (self._path, branch)) self._gitcmd(['branch', '-D', branch]) self._gitcmd(['checkout', '-b', branch]) tmpdir = tempfile.mkdtemp() archiver.extractdir(dir, tmpdir) argv = ['am'] files = os.listdir(tmpdir) for filename in sorted(files): if filename.endswith('.patch'): argv.append("%s/%s" % (tmpdir, filename)) if len(argv) > 1: self._gitcmd(argv) rmtree(tmpdir) def restore(self, archiver): self._get_info() # Ensure a clean tree buf = self._gitcmd(['status', '--short']) if buf: if not args.force: raise RuntimeError("Project %s has local changes" % (self._path)) self._gitcmd(['reset', '--hard', 'HEAD']) for line in buf.rstrip('\n').split('\n'): (status, filename) = line.split(' ', 1) if status == '??': os.unlink("%s/%s" % (self._path, filename)) # Remotes try: buf = archiver.read("%s/remotes" % (self.path())) for line in buf.rstrip('\n').split('\n'): (name, url) = line.split(' ', 1) if name in self._remotes: if url != self._remotes[name]: if not args.force: raise RuntimeError("Project %s has remote %s but different url" % (self._path, name)) self._gitcmd(['remote', 'set-url', name, url]) else: self._gitcmd(['remote', 'add', name, url]) except IOError: pass # Remote branches try: buf = archiver.read("%s/remote_branches" % (self.path())) for line in buf.rstrip('\n').split('\n'): branch = line (remote_name, remote_ref) = branch.split('/', 1) # Remote branches may not exist try: self._gitcmd(['fetch', remote_name, remote_ref]) except RuntimeError: pass except IOError: pass # Local branches branches = archiver.listdir("%s/local_branches" % (self.path())) try: for branch in archiver.listdir("%s/local_branches" % (self.path())): self.restore_branch(archiver, branch) except OSError: pass try: rev = archiver.read("%s/head" % (self.path())).rstrip('\n') except IOError as e: if e.errno != errno.ENOENT: raise rev = self._remote_ref self._gitcmd(['checkout', rev]) # Local changes try: buf = archiver.read("%s/diff" % (self.path())) syscmd(['patch', '-d', self._path, '-p', '1'], buf) except IOError: pass class Repo: def _get_projects(self): logv(1, "Finding projects...") t = time.time() self._projects = dict() argv = ['repo', 'manifest'] (out, err) = syscmd(argv) manifest = ElementTree.fromstring(out) remotes = dict() for elem in manifest.findall('remote'): remotes[elem.get('name')] = elem default = manifest.find('default') for elem in manifest.findall('project'): project_name = elem.get('name') if project_name is None: raise RuntimeError("Project without name") project_path = elem.get('path') if project_path is None: project_path = project_name project_remote = elem.get('remote') if project_remote is None: project_remote = default.get('remote') if project_remote is None: raise RuntimeError("Failed to get remote for %s" % (project_name)) project_ref = elem.get('revision') if project_ref is None: project_ref = remotes[project_remote].get('revision') if project_ref is None: project_ref = default.get('revision') if project_ref is None: raise RuntimeError("Failed to get ref for %s" % (project_name)) project_rev = git_ref_to_rev(project_remote, project_ref) project = Project(project_path, project_remote, project_rev) self._projects[project_path] = project now = time.time() if now >= t + 1.0: sys.stdout.write("\r%d found" % (len(self._projects))) sys.stdout.flush() t = now sys.stdout.write("\rFound %d projects\n" % (len(self._projects))) def backup(self, archiver): argv = ['config', '-f', '.repo/manifests.git/config', 'remote.origin.url'] self._url = gitcmd(None, argv).strip() argv = ['config', '-f', '.repo/manifests.git/config', 'branch.default.merge'] self._ref = gitcmd(None, argv).strip() buf = '' buf += "url=%s\n" % (self._url) buf += "ref=%s\n" % (self._ref) archiver.write('config', buf) try: for name in os.listdir(".repo/local_manifests"): src_filename = ".repo/local_manifests/%s" % (name) dst_filename = ".local_manifests/%s" % (name) if not os.path.isfile(src_filename): continue buf = readfile(src_filename) archiver.write(dst_filename, buf) except OSError as e: if e.errno != errno.ENOENT: raise self._get_projects() if args.projects: project_names = args.projects else: project_names = sorted(self._projects.keys()) n = 0 for name in project_names: project = self._projects[name] n += 1 sys.stdout.write("%s[%d of %d] Backup %s%s" % (spfx, n, len(project_names), project.path(), ssfx)) sys.stdout.flush() project.backup(archiver) sys.stdout.write("%sDone with backup%s\n" % (spfx, ssfx)) def restore(self, archiver): buf = archiver.read("config") for line in buf.rstrip('\n').split('\n'): (k, v) = line.rstrip('\n').split('=', 1) if k == 'url': self._url = v if k == 'ref': self._ref = v if os.path.exists('.repo'): argv = ['config', '-f', '.repo/manifests.git/config', 'remote.origin.url'] existing_url = gitcmd(None, argv).strip() argv = ['config', '-f', '.repo/manifests.git/config', 'branch.default.merge'] existing_ref = gitcmd(None, argv).strip() if existing_url != self._url or existing_ref != self._ref: raise RuntimeError("Existing repo does not match saved repo") if os.path.exists('.repo/local_manifests'): if not args.force: raise RuntimeError("Existing repo has local manifests") rmtree('.repo/local_manifests') else: syscmd(['repo', 'init', '-u', self._url, '-b', self._ref]) sys.stdout.write("Restoring local manifests...\n") sys.stdout.flush() src_dir = "%s/.local_manifests" % (args.backup) dst_dir = ".repo/local_manifests" mkdir_p(dst_dir) for name in archiver.listdir('.local_manifests'): src_filename = ".local_manifests/%s" % (name) dst_filename = "%s/%s" % (dst_dir, name) buf = archiver.read(src_filename) writefile(dst_filename, buf) sys.stdout.write("Syncing repo...\n") sys.stdout.flush() argv = ['repo', 'sync'] if args.projects: argv.extend(args.projects) syscmd(argv) self._get_projects() if args.projects: project_names = args.projects else: project_names = sorted(self._projects.keys()) n = 0 for name in project_names: project = self._projects[name] n += 1 sys.stdout.write("%s[%d of %d] Restore %s%s" % (spfx, n, len(project_names), project.path(), ssfx)) sys.stdout.flush() project.restore(archiver) sys.stdout.write("%sDone with restore%s\n" % (spfx, ssfx)) parser = argparse.ArgumentParser(description='Archive a repo tree') parser.add_argument('-v', '--verbose', action='count', default=0, help='Increase verbosity') parser.add_argument('-f', '--force', action='store_true', help='Force deletion of existing objects') parser.add_argument('-b', '--backup', default='archive.zip', help='Location of backup (zipfile or directory) [archive.zip]') parser.add_argument('-r', '--repo', help='Location of repo tree [$PWD]') parser.add_argument('action', choices=['backup', 'restore'], nargs=1) parser.add_argument('projects', nargs='*') args = parser.parse_args() pwd = os.getcwd() if args.repo: mkdir_p(args.repo) os.chdir(args.repo) if args.action[0] == 'backup': if os.path.exists(args.backup): if not args.force: sys.stderr.write("%s already exists\n" % (args.backup)) sys.exit(1) if os.path.isdir(args.backup): rmtree(args.backup) else: os.unlink(args.backup) archiver = Archiver.instance(args.backup, 'w') repo = Repo() repo.backup(archiver) if args.action[0] == 'restore': archiver = Archiver.instance(args.backup, 'r') repo = Repo() repo.restore(archiver)