Mercurial > jhg
view src/org/tmatesoft/hg/repo/HgWorkingCopyStatusCollector.java @ 302:a7a3395a519e
Walk explicit revisions to avoid troubles with unnatural repositories
| author | Artem Tikhomirov <tikhomirov.artem@gmail.com> | 
|---|---|
| date | Sat, 17 Sep 2011 14:01:31 +0200 | 
| parents | 981f9f50bb6c | 
| children | fb74133d2025 | 
line wrap: on
 line source
/* * Copyright (c) 2011 TMate Software Ltd * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * For information on how to redistribute this software under * the terms of a license other than GNU General Public License * contact TMate Software at support@hg4j.com */ package org.tmatesoft.hg.repo; import static java.lang.Math.max; import static java.lang.Math.min; import static org.tmatesoft.hg.repo.HgRepository.*; import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.ReadableByteChannel; import java.util.ArrayList; import java.util.Collections; import java.util.NoSuchElementException; import java.util.Set; import java.util.TreeSet; import org.tmatesoft.hg.core.HgDataStreamException; import org.tmatesoft.hg.core.HgException; import org.tmatesoft.hg.core.Nodeid; import org.tmatesoft.hg.internal.ByteArrayChannel; import org.tmatesoft.hg.internal.Experimental; import org.tmatesoft.hg.internal.FilterByteChannel; import org.tmatesoft.hg.internal.ManifestRevision; import org.tmatesoft.hg.internal.PathScope; import org.tmatesoft.hg.util.ByteChannel; import org.tmatesoft.hg.util.CancelledException; import org.tmatesoft.hg.util.FileInfo; import org.tmatesoft.hg.util.FileIterator; import org.tmatesoft.hg.util.FileWalker; import org.tmatesoft.hg.util.Path; import org.tmatesoft.hg.util.PathPool; import org.tmatesoft.hg.util.PathRewrite; import org.tmatesoft.hg.util.RegularFileInfo; /** * * @author Artem Tikhomirov * @author TMate Software Ltd. */ public class HgWorkingCopyStatusCollector { private final HgRepository repo; private final FileIterator repoWalker; private HgDirstate dirstate; private HgStatusCollector baseRevisionCollector; private PathPool pathPool; private ManifestRevision dirstateParentManifest; public HgWorkingCopyStatusCollector(HgRepository hgRepo) { this(hgRepo, new HgInternals(hgRepo).createWorkingDirWalker(null)); } // FIXME document cons public HgWorkingCopyStatusCollector(HgRepository hgRepo, FileIterator hgRepoWalker) { repo = hgRepo; repoWalker = hgRepoWalker; } /** * Optionally, supply a collector instance that may cache (or have already cached) base revision * @param sc may be null */ public void setBaseRevisionCollector(HgStatusCollector sc) { baseRevisionCollector = sc; } /*package-local*/ PathPool getPathPool() { if (pathPool == null) { if (baseRevisionCollector == null) { pathPool = new PathPool(new PathRewrite.Empty()); } else { return baseRevisionCollector.getPathPool(); } } return pathPool; } public void setPathPool(PathPool pathPool) { this.pathPool = pathPool; } /** * Access to directory state information this collector uses. * @return directory state holder, never <code>null</code> */ public HgDirstate getDirstate() { if (dirstate == null) { dirstate = repo.loadDirstate(getPathPool()); } return dirstate; } private ManifestRevision getManifest(int changelogLocalRev) { assert changelogLocalRev >= 0; ManifestRevision mr; if (baseRevisionCollector != null) { mr = baseRevisionCollector.raw(changelogLocalRev); } else { mr = new ManifestRevision(null, null); repo.getManifest().walk(changelogLocalRev, changelogLocalRev, mr); } return mr; } private ManifestRevision getDirstateParentManifest() { // WC not necessarily points to TIP, but may be result of update to any previous revision. // In such case, we need to compare local files not to their TIP content, but to specific version at the time of selected revision if (dirstateParentManifest == null) { Nodeid dirstateParent = getDirstate().parents().first(); if (dirstateParent.isNull()) { dirstateParentManifest = baseRevisionCollector != null ? baseRevisionCollector.raw(-1) : HgStatusCollector.createEmptyManifestRevision(); } else { int changelogLocalRev = repo.getChangelog().getLocalRevision(dirstateParent); dirstateParentManifest = getManifest(changelogLocalRev); } } return dirstateParentManifest; } // may be invoked few times, TIP or WORKING_COPY indicate comparison shall be run against working copy parent // NOTE, use of TIP constant requires certain care. TIP here doesn't mean latest cset, but actual working copy parent. public void walk(int baseRevision, HgStatusInspector inspector) { if (HgInternals.wrongLocalRevision(baseRevision) || baseRevision == BAD_REVISION) { throw new IllegalArgumentException(String.valueOf(baseRevision)); } ManifestRevision collect = null; // non null indicates we compare against base revision Set<Path> baseRevFiles = Collections.emptySet(); // files from base revision not affected by status calculation if (baseRevision != TIP && baseRevision != WORKING_COPY) { collect = getManifest(baseRevision); baseRevFiles = new TreeSet<Path>(collect.files()); } if (inspector instanceof HgStatusCollector.Record) { HgStatusCollector sc = baseRevisionCollector == null ? new HgStatusCollector(repo) : baseRevisionCollector; // nodeidAfterChange(dirstate's parent) doesn't make too much sense, // because the change might be actually in working copy. Nevertheless, // as long as no nodeids can be provided for WC, seems reasonable to report // latest known nodeid change (although at the moment this is not used and // is done mostly not to leave stale initialization in the Record) int rev1,rev2 = getDirstateParentManifest().changesetLocalRev(); if (baseRevision == TIP || baseRevision == WORKING_COPY) { rev1 = rev2 - 1; // just use revision prior to dirstate's parent } else { rev1 = baseRevision; } ((HgStatusCollector.Record) inspector).init(rev1, rev2, sc); } final HgIgnore hgIgnore = repo.getIgnore(); repoWalker.reset(); TreeSet<Path> processed = new TreeSet<Path>(); // names of files we handled as they known to Dirstate (not FileIterator) final HgDirstate ds = getDirstate(); TreeSet<Path> knownEntries = ds.all(); // here just to get dirstate initialized while (repoWalker.hasNext()) { repoWalker.next(); final Path fname = getPathPool().path(repoWalker.name()); FileInfo f = repoWalker.file(); Path knownInDirstate; if (!f.exists()) { // file coming from iterator doesn't exist. if ((knownInDirstate = ds.known(fname)) != null) { // found in dirstate processed.add(knownInDirstate); if (ds.checkRemoved(knownInDirstate) == null) { inspector.missing(knownInDirstate); } else { inspector.removed(knownInDirstate); } // do not report it as removed later if (collect != null) { baseRevFiles.remove(knownInDirstate); } } else { // chances are it was known in baseRevision. We may rely // that later iteration over baseRevFiles leftovers would yield correct Removed, // but it doesn't hurt to be explicit (provided we know fname *is* inScope of the FileIterator if (collect != null && baseRevFiles.remove(fname)) { inspector.removed(fname); } else { // not sure I shall report such files (i.e. arbitrary name coming from FileIterator) // as unknown. Command-line HG aborts "system can't find the file specified" // in similar case (against wc), or just gives nothing if --change <rev> is specified. // however, as it's unlikely to get unexisting files from FileIterator, and // its better to see erroneous file status rather than not to see any (which is too easy // to overlook), I think unknown() is reasonable approach here inspector.unknown(fname); } } continue; } if ((knownInDirstate = ds.known(fname)) != null) { // tracked file. // modified, added, removed, clean processed.add(knownInDirstate); if (collect != null) { // need to check against base revision, not FS file checkLocalStatusAgainstBaseRevision(baseRevFiles, collect, baseRevision, knownInDirstate, f, inspector); } else { checkLocalStatusAgainstFile(knownInDirstate, f, inspector); } } else { if (hgIgnore.isIgnored(fname)) { // hgignore shall be consulted only for non-tracked files inspector.ignored(fname); } else { inspector.unknown(fname); } // the file is not tracked. Even if it's known at baseRevision, we don't need to remove it // from baseRevFiles, it might need to be reported as removed as well (cmdline client does // yield two statuses for the same file) } } if (collect != null) { for (Path fromBase : baseRevFiles) { if (repoWalker.inScope(fromBase)) { inspector.removed(fromBase); } } } knownEntries.removeAll(processed); for (Path m : knownEntries) { if (!repoWalker.inScope(m)) { // do not report as missing/removed those FileIterator doesn't care about. continue; } // missing known file from a working dir if (ds.checkRemoved(m) == null) { // not removed from the repository = 'deleted' inspector.missing(m); } else { // removed from the repo // if we check against non-tip revision, do not report files that were added past that revision and now removed. if (collect == null || baseRevFiles.contains(m)) { inspector.removed(m); } } } } public HgStatusCollector.Record status(int baseRevision) { HgStatusCollector.Record rv = new HgStatusCollector.Record(); walk(baseRevision, rv); return rv; } //******************************************** private void checkLocalStatusAgainstFile(Path fname, FileInfo f, HgStatusInspector inspector) { HgDirstate.Record r; if ((r = getDirstate().checkNormal(fname)) != null) { // either clean or modified final boolean timestampEqual = f.lastModified() == r.modificationTime(), sizeEqual = r.size() == f.length(); if (timestampEqual && sizeEqual) { inspector.clean(fname); } else if (!sizeEqual && r.size() >= 0) { inspector.modified(fname); } else { // size is the same or unknown, and, perhaps, different timestamp // check actual content to avoid false modified files HgDataFile df = repo.getFileNode(fname); Nodeid rev = getDirstateParentManifest().nodeid(fname); if (!areTheSame(f, df, rev)) { inspector.modified(df.getPath()); } else { inspector.clean(df.getPath()); } } } else if ((r = getDirstate().checkAdded(fname)) != null) { if (r.copySource() == null) { inspector.added(fname); } else { inspector.copied(r.copySource(), fname); } } else if ((r = getDirstate().checkRemoved(fname)) != null) { inspector.removed(fname); } else if ((r = getDirstate().checkMerged(fname)) != null) { inspector.modified(fname); } } // XXX refactor checkLocalStatus methods in more OO way private void checkLocalStatusAgainstBaseRevision(Set<Path> baseRevNames, ManifestRevision collect, int baseRevision, Path fname, FileInfo f, HgStatusInspector inspector) { // fname is in the dirstate, either Normal, Added, Removed or Merged Nodeid nid1 = collect.nodeid(fname); HgManifest.Flags flags = collect.flags(fname); HgDirstate.Record r; if (nid1 == null) { // normal: added? // added: not known at the time of baseRevision, shall report // merged: was not known, report as added? if ((r = getDirstate().checkNormal(fname)) != null) { try { Path origin = HgStatusCollector.getOriginIfCopy(repo, fname, baseRevNames, baseRevision); if (origin != null) { inspector.copied(getPathPool().path(origin), fname); return; } } catch (HgDataStreamException ex) { ex.printStackTrace(); // FIXME report to a mediator, continue status collection } } else if ((r = getDirstate().checkAdded(fname)) != null) { if (r.copySource() != null && baseRevNames.contains(r.copySource())) { baseRevNames.remove(r.copySource()); // XXX surely I shall not report rename source as Removed? inspector.copied(r.copySource(), fname); return; } // fall-through, report as added } else if (getDirstate().checkRemoved(fname) != null) { // removed: removed file was not known at the time of baseRevision, and we should not report it as removed return; } inspector.added(fname); } else { // was known; check whether clean or modified Nodeid nidFromDirstate = getDirstateParentManifest().nodeid(fname); if ((r = getDirstate().checkNormal(fname)) != null && nid1.equals(nidFromDirstate)) { // regular file, was the same up to WC initialization. Check if was modified since, and, if not, report right away // same code as in #checkLocalStatusAgainstFile final boolean timestampEqual = f.lastModified() == r.modificationTime(), sizeEqual = r.size() == f.length(); boolean handled = false; if (timestampEqual && sizeEqual) { inspector.clean(fname); handled = true; } else if (!sizeEqual && r.size() >= 0) { inspector.modified(fname); handled = true; } else if (!todoCheckFlagsEqual(f, flags)) { // seems like flags have changed, no reason to check content further inspector.modified(fname); handled = true; } if (handled) { baseRevNames.remove(fname); // consumed, processed, handled. return; } // otherwise, shall check actual content (size not the same, or unknown (-1 or -2), or timestamp is different, // or nodeid in dirstate is different, but local change might have brought it back to baseRevision state) // FALL THROUGH } if (r != null || (r = getDirstate().checkMerged(fname)) != null || (r = getDirstate().checkAdded(fname)) != null) { // check actual content to see actual changes // when added - seems to be the case of a file added once again, hence need to check if content is different // either clean or modified HgDataFile fileNode = repo.getFileNode(fname); if (areTheSame(f, fileNode, nid1)) { inspector.clean(fname); } else { inspector.modified(fname); } baseRevNames.remove(fname); // consumed, processed, handled. } else if (getDirstate().checkRemoved(fname) != null) { // was known, and now marked as removed, report it right away, do not rely on baseRevNames processing later inspector.removed(fname); baseRevNames.remove(fname); // consumed, processed, handled. } // only those left in baseRevNames after processing are reported as removed } // TODO think over if content comparison may be done more effectively by e.g. calculating nodeid for a local file and comparing it with nodeid from manifest // we don't need to tell exact difference, hash should be enough to detect difference, and it doesn't involve reading historical file content, and it's relatively // cheap to calc hash on a file (no need to keep it completely in memory). OTOH, if I'm right that the next approach is used for nodeids: // changeset nodeid + hash(actual content) => entry (Nodeid) in the next Manifest // then it's sufficient to check parents from dirstate, and if they do not match parents from file's baseRevision (non matching parents means different nodeids). // The question is whether original Hg treats this case (same content, different parents and hence nodeids) as 'modified' or 'clean' } private boolean areTheSame(FileInfo f, HgDataFile dataFile, Nodeid revision) { // XXX consider adding HgDataDile.compare(File/byte[]/whatever) operation to optimize comparison ByteArrayChannel bac = new ByteArrayChannel(); boolean ioFailed = false; try { int localRevision = dataFile.getLocalRevision(revision); // need content with metadata striped off - although theoretically chances are metadata may be different, // WC doesn't have it anyway dataFile.content(localRevision, bac); } catch (CancelledException ex) { // silently ignore - can't happen, ByteArrayChannel is not cancellable } catch (HgException ex) { ioFailed = true; } return !ioFailed && areTheSame(f, bac.toArray(), dataFile.getPath()); } private boolean areTheSame(FileInfo f, final byte[] data, Path p) { ReadableByteChannel is = null; class Check implements ByteChannel { final boolean debug = repo.getContext().getLog().isDebug(); boolean sameSoFar = true; int x = 0; public int write(ByteBuffer buffer) { for (int i = buffer.remaining(); i > 0; i--, x++) { if (x >= data.length /*file has been appended*/ || data[x] != buffer.get()) { if (debug) { byte[] xx = new byte[15]; if (buffer.position() > 5) { buffer.position(buffer.position() - 5); } buffer.get(xx, 0, min(xx.length, i)); repo.getContext().getLog().debug(getClass(), "expected >>%s<< but got >>%s<<", new String(data, max(0, x - 4), min(data.length - x, 20)), new String(xx)); } sameSoFar = false; break; } } buffer.position(buffer.limit()); // mark as read return buffer.limit(); } public boolean sameSoFar() { return sameSoFar; } public boolean ultimatelyTheSame() { return sameSoFar && x == data.length; } }; Check check = new Check(); try { is = f.newInputChannel(); ByteBuffer fb = ByteBuffer.allocate(min(1 + data.length * 2 /*to fit couple of lines appended; never zero*/, 8192)); FilterByteChannel filters = new FilterByteChannel(check, repo.getFiltersFromWorkingDirToRepo(p)); while (is.read(fb) != -1 && check.sameSoFar()) { fb.flip(); filters.write(fb); fb.compact(); } return check.ultimatelyTheSame(); } catch (CancelledException ex) { repo.getContext().getLog().warn(getClass(), ex, "Unexpected cancellation"); return check.ultimatelyTheSame(); } catch (IOException ex) { repo.getContext().getLog().warn(getClass(), ex, null); } finally { if (is != null) { try { is.close(); } catch (IOException ex) { repo.getContext().getLog().info(getClass(), ex, null); } } } return false; } private static boolean todoCheckFlagsEqual(FileInfo f, HgManifest.Flags originalManifestFlags) { // FIXME implement return true; } /** * Configure status collector to consider only subset of a working copy tree. Tries to be as effective as possible, and to * traverse only relevant part of working copy on the filesystem. * * @param hgRepo repository * @param paths repository-relative files and/or directories. Directories are processed recursively. * * @return new instance of {@link HgWorkingCopyStatusCollector}, ready to {@link #walk(int, HgStatusInspector) walk} associated working copy */ @Experimental(reason="Provisional API") public static HgWorkingCopyStatusCollector create(HgRepository hgRepo, Path... paths) { ArrayList<Path> f = new ArrayList<Path>(5); ArrayList<Path> d = new ArrayList<Path>(5); for (Path p : paths) { if (p.isDirectory()) { d.add(p); } else { f.add(p); } } // final Path[] dirs = f.toArray(new Path[d.size()]); if (d.isEmpty()) { final Path[] files = f.toArray(new Path[f.size()]); FileIterator fi = new FileListIterator(hgRepo.getWorkingDir(), files); return new HgWorkingCopyStatusCollector(hgRepo, fi); } // //FileIterator fi = file.isDirectory() ? new DirFileIterator(hgRepo, file) : new FileListIterator(, file); FileIterator fi = new HgInternals(hgRepo).createWorkingDirWalker(new PathScope(true, paths)); return new HgWorkingCopyStatusCollector(hgRepo, fi); } /** * Configure collector object to calculate status for matching files only. * This method may be less effective than explicit list of files as it iterates over whole repository * (thus supplied matcher doesn't need to care if directories to files in question are also in scope, * see {@link FileWalker#FileWalker(File, Path.Source, Path.Matcher)}) * * @return new instance of {@link HgWorkingCopyStatusCollector}, ready to {@link #walk(int, HgStatusInspector) walk} associated working copy */ @Experimental(reason="Provisional API. May add boolean strict argument for those who write smart matchers that can be used in FileWalker") public static HgWorkingCopyStatusCollector create(HgRepository hgRepo, Path.Matcher scope) { FileIterator w = new HgInternals(hgRepo).createWorkingDirWalker(null); FileIterator wf = (scope == null || scope instanceof Path.Matcher.Any) ? w : new FileIteratorFilter(w, scope); // the reason I need to iterate over full repo and apply filter is that I have no idea whatsoever about // patterns in the scope. I.e. if scope lists a file (PathGlobMatcher("a/b/c.txt")), FileWalker won't get deep // to the file unless matcher would also explicitly include "a/", "a/b/" in scope. Since I can't rely // users would write robust matchers, and I don't see a decent way to enforce that (i.e. factory to produce // correct matcher from Path is much like what PathScope does, and can be accessed directly with #create(repo, Path...) // method above/ return new HgWorkingCopyStatusCollector(hgRepo, wf); } private static class FileListIterator implements FileIterator { private final File dir; private final Path[] paths; private int index; private RegularFileInfo nextFile; public FileListIterator(File startDir, Path... files) { dir = startDir; paths = files; reset(); } public void reset() { index = -1; nextFile = new RegularFileInfo(); } public boolean hasNext() { return paths.length > 0 && index < paths.length-1; } public void next() { index++; if (index == paths.length) { throw new NoSuchElementException(); } nextFile.init(new File(dir, paths[index].toString())); } public Path name() { return paths[index]; } public FileInfo file() { return nextFile; } public boolean inScope(Path file) { for (int i = 0; i < paths.length; i++) { if (paths[i].equals(file)) { return true; } } return false; } } private static class FileIteratorFilter implements FileIterator { private final Path.Matcher filter; private final FileIterator walker; private boolean didNext = false; public FileIteratorFilter(FileIterator fileWalker, Path.Matcher filterMatcher) { assert fileWalker != null; assert filterMatcher != null; filter = filterMatcher; walker = fileWalker; } public void reset() { walker.reset(); } public boolean hasNext() { while (walker.hasNext()) { walker.next(); if (filter.accept(walker.name())) { didNext = true; return true; } } return false; } public void next() { if (didNext) { didNext = false; } else { if (!hasNext()) { throw new NoSuchElementException(); } } } public Path name() { return walker.name(); } public FileInfo file() { return walker.file(); } public boolean inScope(Path file) { return filter.accept(file); } } }
