#!/usr/bin/env python """ A small program to compute checksums of LLVM checkout. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import hashlib import logging import re import sys from argparse import ArgumentParser from project_tree import * SVN_DATES_REGEX = re.compile(r"\$(Date|LastChangedDate)[^\$]+\$") def main(): parser = ArgumentParser() parser.add_argument( "-v", "--verbose", action="store_true", help="enable debug logging") parser.add_argument( "-c", "--check", metavar="reference_file", help="read checksums from reference_file and " + "check they match checksums of llvm_path.") parser.add_argument( "--partial", action="store_true", help="ignore projects from reference_file " + "that are not checked out in llvm_path.") parser.add_argument( "--multi_dir", action="store_true", help="indicates llvm_path contains llvm, checked out " + "into multiple directories, as opposed to a " + "typical single source tree checkout.") parser.add_argument("llvm_path") args = parser.parse_args() if args.check is not None: with open(args.check, "r") as f: reference_checksums = ReadLLVMChecksums(f) else: reference_checksums = None if args.verbose: logging.basicConfig(level=logging.DEBUG) llvm_projects = CreateLLVMProjects(not args.multi_dir) checksums = ComputeLLVMChecksums(args.llvm_path, llvm_projects) if reference_checksums is None: WriteLLVMChecksums(checksums, sys.stdout) sys.exit(0) if not ValidateChecksums(reference_checksums, checksums, args.partial): sys.stdout.write("Checksums differ.\nNew checksums:\n") WriteLLVMChecksums(checksums, sys.stdout) sys.stdout.write("Reference checksums:\n") WriteLLVMChecksums(reference_checksums, sys.stdout) sys.exit(1) else: sys.stdout.write("Checksums match.") def ComputeLLVMChecksums(root_path, projects): """Compute checksums for LLVM sources checked out using svn. Args: root_path: a directory of llvm checkout. projects: a list of LLVMProject instances, which describe checkout paths, relative to root_path. Returns: A dict mapping from project name to project checksum. """ hash_algo = hashlib.sha256 def collapse_svn_substitutions(contents): # Replace svn substitutions for $Date$ and $LastChangedDate$. # Unfortunately, these are locale-specific. return SVN_DATES_REGEX.sub("$\1$", contents) def read_and_collapse_svn_subsitutions(file_path): with open(file_path, "rb") as f: contents = f.read() new_contents = collapse_svn_substitutions(contents) if contents != new_contents: logging.debug("Replaced svn keyword substitutions in %s", file_path) logging.debug("\n\tBefore\n%s\n\tAfter\n%s", contents, new_contents) return new_contents project_checksums = dict() # Hash each project. for proj in projects: project_root = os.path.join(root_path, proj.relpath) if not os.path.exists(project_root): logging.info("Folder %s doesn't exist, skipping project %s", proj.relpath, proj.name) continue files = list() def add_file_hash(file_path): if os.path.islink(file_path) and not os.path.exists(file_path): content = os.readlink(file_path) else: content = read_and_collapse_svn_subsitutions(file_path) hasher = hash_algo() hasher.update(content) file_digest = hasher.hexdigest() logging.debug("Checksum %s for file %s", file_digest, file_path) files.append((file_path, file_digest)) logging.info("Computing checksum for %s", proj.name) WalkProjectFiles(root_path, projects, proj, add_file_hash) # Compute final checksum. files.sort(key=lambda x: x[0]) hasher = hash_algo() for file_path, file_digest in files: file_path = os.path.relpath(file_path, project_root) hasher.update(file_path) hasher.update(file_digest) project_checksums[proj.name] = hasher.hexdigest() return project_checksums def WriteLLVMChecksums(checksums, f): """Writes checksums to a text file. Args: checksums: a dict mapping from project name to project checksum (result of ComputeLLVMChecksums). f: a file object to write into. """ for proj in sorted(checksums.keys()): f.write("{} {}\n".format(checksums[proj], proj)) def ReadLLVMChecksums(f): """Reads checksums from a text file, produced by WriteLLVMChecksums. Returns: A dict, mapping from project name to project checksum. """ checksums = {} while True: line = f.readline() if line == "": break checksum, proj = line.split() checksums[proj] = checksum return checksums def ValidateChecksums(reference_checksums, new_checksums, allow_missing_projects=False): """Validates that reference_checksums and new_checksums match. Args: reference_checksums: a dict of reference checksums, mapping from a project name to a project checksum. new_checksums: a dict of checksums to be checked, mapping from a project name to a project checksum. allow_missing_projects: When True, reference_checksums may contain more projects than new_checksums. Projects missing from new_checksums are ignored. When False, new_checksums and reference_checksums must contain checksums for the same set of projects. If there is a project in reference_checksums, missing from new_checksums, ValidateChecksums will return False. Returns: True, if checksums match with regards to allow_missing_projects flag value. False, otherwise. """ if not allow_missing_projects: if len(new_checksums) != len(reference_checksums): return False for proj, checksum in new_checksums.items(): # We never computed a checksum for this project. if proj not in reference_checksums: return False # Checksum did not match. if reference_checksums[proj] != checksum: return False return True if __name__ == "__main__": main()