Mercurial > hg4j
comparison src/org/tmatesoft/hg/repo/HgStatusCollector.java @ 197:3a7696fb457c
Investigate optimization options to allow fast processing of huge repositories. Fix defect in StatusCollector that lead to wrong result comparing first revision to empty repo (-1 to 0), due to same TIP constant value
| author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
|---|---|
| date | Tue, 19 Apr 2011 03:49:29 +0200 |
| parents | c9b305df0b89 |
| children | 047b1dec7a04 |
comparison
equal
deleted
inserted
replaced
| 196:e2115da4cf6a | 197:3a7696fb457c |
|---|---|
| 49 private final SortedMap<Integer, ManifestRevisionInspector> cache; // sparse array, in fact | 49 private final SortedMap<Integer, ManifestRevisionInspector> cache; // sparse array, in fact |
| 50 // with cpython repository, ~70 000 changes, complete Log (direct out, no reverse) output | 50 // with cpython repository, ~70 000 changes, complete Log (direct out, no reverse) output |
| 51 // no cache limit, no nodeids and fname caching - OOME on changeset 1035 | 51 // no cache limit, no nodeids and fname caching - OOME on changeset 1035 |
| 52 // no cache limit, but with cached nodeids and filenames - 1730+ | 52 // no cache limit, but with cached nodeids and filenames - 1730+ |
| 53 // cache limit 100 - 19+ minutes to process 10000, and still working (too long, stopped) | 53 // cache limit 100 - 19+ minutes to process 10000, and still working (too long, stopped) |
| 54 private final int cacheMaxSize = 100; // do not keep too much manifest revisions | 54 private final int cacheMaxSize = 50; // do not keep too much manifest revisions |
| 55 private PathPool pathPool; | 55 private PathPool pathPool; |
| 56 private final Pool<Nodeid> cacheNodes; | 56 private final Pool<Nodeid> cacheNodes; |
| 57 private final Pool<String> cacheFilenames; // XXX in fact, need to think if use of PathPool directly instead is better solution | 57 private final Pool<String> cacheFilenames; // XXX in fact, need to think if use of PathPool directly instead is better solution |
| 58 private final ManifestRevisionInspector emptyFakeState; | |
| 58 | 59 |
| 59 | 60 |
| 60 public HgStatusCollector(HgRepository hgRepo) { | 61 public HgStatusCollector(HgRepository hgRepo) { |
| 61 this.repo = hgRepo; | 62 this.repo = hgRepo; |
| 62 cache = new TreeMap<Integer, ManifestRevisionInspector>(); | 63 cache = new TreeMap<Integer, ManifestRevisionInspector>(); |
| 63 cacheNodes = new Pool<Nodeid>(); | 64 cacheNodes = new Pool<Nodeid>(); |
| 64 cacheFilenames = new Pool<String>(); | 65 cacheFilenames = new Pool<String>(); |
| 65 ManifestRevisionInspector emptyFakeState = new ManifestRevisionInspector(null, null); | 66 |
| 67 emptyFakeState = new ManifestRevisionInspector(null, null); | |
| 66 emptyFakeState.begin(-1, null); | 68 emptyFakeState.begin(-1, null); |
| 67 emptyFakeState.end(-1); // FIXME HgRepo.TIP == -1 as well, need to distinguish fake "prior to first" revision from "the very last" | 69 emptyFakeState.end(-1); |
| 68 cache.put(-1, emptyFakeState); | |
| 69 } | 70 } |
| 70 | 71 |
| 71 public HgRepository getRepo() { | 72 public HgRepository getRepo() { |
| 72 return repo; | 73 return repo; |
| 73 } | 74 } |
| 74 | 75 |
| 75 private ManifestRevisionInspector get(int rev) { | 76 private ManifestRevisionInspector get(int rev) { |
| 76 ManifestRevisionInspector i = cache.get(rev); | 77 ManifestRevisionInspector i = cache.get(rev); |
| 77 if (i == null) { | 78 if (i == null) { |
| 78 if (cache.size() > cacheMaxSize) { | 79 if (rev == -1) { |
| 80 return emptyFakeState; | |
| 81 } | |
| 82 while (cache.size() > cacheMaxSize) { | |
| 79 // assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary | 83 // assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary |
| 80 cache.remove(cache.firstKey()); | 84 cache.remove(cache.firstKey()); |
| 81 } | 85 } |
| 82 i = new ManifestRevisionInspector(cacheNodes, cacheFilenames); | 86 i = new ManifestRevisionInspector(cacheNodes, cacheFilenames); |
| 83 cache.put(rev, i); | 87 cache.put(rev, i); |
| 84 repo.getManifest().walk(rev, rev, i); | 88 repo.getManifest().walk(rev, rev, i); |
| 85 } | 89 } |
| 86 return i; | 90 return i; |
| 91 } | |
| 92 | |
| 93 private boolean cached(int revision) { | |
| 94 return cache.containsKey(revision) || revision == -1; | |
| 95 } | |
| 96 | |
| 97 private void initCacheRange(int minRev, int maxRev) { | |
| 98 while (cache.size() > cacheMaxSize) { | |
| 99 // assume usually we go from oldest to newest, hence remove oldest as most likely to be no longer necessary | |
| 100 cache.remove(cache.firstKey()); | |
| 101 } | |
| 102 repo.getManifest().walk(minRev, maxRev, new HgManifest.Inspector() { | |
| 103 private ManifestRevisionInspector delegate; | |
| 104 private boolean cacheHit; // range may include revisions we already know about, do not re-create them | |
| 105 | |
| 106 public boolean begin(int revision, Nodeid nid) { | |
| 107 assert delegate == null; | |
| 108 if (cache.containsKey(revision)) { // don't need to check emptyFakeState hit as revision never -1 here | |
| 109 cacheHit = true; | |
| 110 } else { | |
| 111 cache.put(revision, delegate = new ManifestRevisionInspector(cacheNodes, cacheFilenames)); | |
| 112 // cache may grow bigger than max size here, but it's ok as present simplistic cache clearing mechanism may | |
| 113 // otherwise remove entries we just added | |
| 114 delegate.begin(revision, nid); | |
| 115 cacheHit = false; | |
| 116 } | |
| 117 return true; | |
| 118 } | |
| 119 | |
| 120 public boolean next(Nodeid nid, String fname, String flags) { | |
| 121 if (!cacheHit) { | |
| 122 delegate.next(nid, fname, flags); | |
| 123 } | |
| 124 return true; | |
| 125 } | |
| 126 | |
| 127 public boolean end(int revision) { | |
| 128 if (!cacheHit) { | |
| 129 delegate.end(revision); | |
| 130 } | |
| 131 cacheHit = false; | |
| 132 delegate = null; | |
| 133 return true; | |
| 134 } | |
| 135 }); | |
| 87 } | 136 } |
| 88 | 137 |
| 89 /*package-local*/ ManifestRevisionInspector raw(int rev) { | 138 /*package-local*/ ManifestRevisionInspector raw(int rev) { |
| 90 return get(rev); | 139 return get(rev); |
| 91 } | 140 } |
| 108 public void change(int rev, HgStatusInspector inspector) { | 157 public void change(int rev, HgStatusInspector inspector) { |
| 109 int[] parents = new int[2]; | 158 int[] parents = new int[2]; |
| 110 repo.getChangelog().parents(rev, parents, null, null); | 159 repo.getChangelog().parents(rev, parents, null, null); |
| 111 walk(parents[0], rev, inspector); | 160 walk(parents[0], rev, inspector); |
| 112 } | 161 } |
| 113 | 162 |
| 114 // I assume revision numbers are the same for changelog and manifest - here | 163 // I assume revision numbers are the same for changelog and manifest - here |
| 115 // user would like to pass changelog revision numbers, and I use them directly to walk manifest. | 164 // user would like to pass changelog revision numbers, and I use them directly to walk manifest. |
| 116 // if this assumption is wrong, fix this (lookup manifest revisions from changeset). | 165 // if this assumption is wrong, fix this (lookup manifest revisions from changeset). |
| 166 // rev1 and rev2 may be -1 to indicate comparison to empty repository | |
| 167 // argument order matters | |
| 117 public void walk(int rev1, int rev2, HgStatusInspector inspector) { | 168 public void walk(int rev1, int rev2, HgStatusInspector inspector) { |
| 118 if (rev1 == rev2) { | 169 if (rev1 == rev2) { |
| 119 throw new IllegalArgumentException(); | 170 throw new IllegalArgumentException(); |
| 120 } | 171 } |
| 121 if (inspector == null) { | 172 if (inspector == null) { |
| 122 throw new IllegalArgumentException(); | 173 throw new IllegalArgumentException(); |
| 123 } | 174 } |
| 124 if (inspector instanceof Record) { | 175 if (inspector instanceof Record) { |
| 125 ((Record) inspector).init(rev1, rev2, this); | 176 ((Record) inspector).init(rev1, rev2, this); |
| 126 } | 177 } |
| 178 final int lastManifestRevision = repo.getManifest().getLastRevision(); | |
| 127 if (rev1 == TIP) { | 179 if (rev1 == TIP) { |
| 128 rev1 = repo.getManifest().getLastRevision(); | 180 rev1 = lastManifestRevision; |
| 129 } | 181 } |
| 130 if (rev2 == TIP) { | 182 if (rev2 == TIP) { |
| 131 rev2 = repo.getManifest().getLastRevision(); | 183 rev2 = lastManifestRevision; |
| 132 } | 184 } |
| 133 // in fact, rev1 and rev2 are often next (or close) to each other, | 185 // in fact, rev1 and rev2 are often next (or close) to each other, |
| 134 // thus, we can optimize Manifest reads here (manifest.walk(rev1, rev2)) | 186 // thus, we can optimize Manifest reads here (manifest.walk(rev1, rev2)) |
| 135 ManifestRevisionInspector r1, r2 ; | 187 ManifestRevisionInspector r1, r2 ; |
| 136 if (!cache.containsKey(rev1) && !cache.containsKey(rev2) && Math.abs(rev1 - rev2) < 5 /*subjective equivalent of 'close enough'*/) { | 188 boolean need1 = !cached(rev1), need2 = !cached(rev2); |
| 137 int minRev = rev1 < rev2 ? rev1 : rev2; | 189 if (need1 || need2) { |
| 138 int maxRev = minRev == rev1 ? rev2 : rev1; | 190 int minRev, maxRev; |
| 139 if (minRev > 0) { | 191 if (need1 && need2 && Math.abs(rev1 - rev2) < 5 /*subjective equivalent of 'close enough'*/) { |
| 140 minRev--; // expand range a bit | 192 minRev = rev1 < rev2 ? rev1 : rev2; |
| 141 // XXX perhaps, if revlog.baseRevision is cheap, shall expand minRev up to baseRevision | 193 maxRev = minRev == rev1 ? rev2 : rev1; |
| 142 // which gonna be read anyway | 194 if (minRev > 0) { |
| 143 } | 195 minRev--; // expand range a bit |
| 144 | 196 } |
| 145 repo.getManifest().walk(minRev, maxRev, new HgManifest.Inspector() { | 197 initCacheRange(minRev, maxRev); |
| 146 private ManifestRevisionInspector delegate; | 198 need1 = need2 = false; |
| 147 | 199 } |
| 148 public boolean begin(int revision, Nodeid nid) { | 200 // either both unknown and far from each other, or just one of them. |
| 149 cache.put(revision, delegate = new ManifestRevisionInspector(cacheNodes, cacheFilenames)); | 201 // read with neighbors to save potential subsequent calls for neighboring elements |
| 150 delegate.begin(revision, nid); | 202 // XXX perhaps, if revlog.baseRevision is cheap, shall expand minRev up to baseRevision |
| 151 return true; | 203 // which going to be read anyway |
| 152 } | 204 if (need1) { |
| 153 | 205 minRev = rev1; |
| 154 public boolean next(Nodeid nid, String fname, String flags) { | 206 maxRev = rev1 < lastManifestRevision-5 ? rev1+5 : lastManifestRevision; |
| 155 delegate.next(nid, fname, flags); | 207 initCacheRange(minRev, maxRev); |
| 156 return true; | 208 } |
| 157 } | 209 if (need2) { |
| 158 | 210 minRev = rev2; |
| 159 public boolean end(int revision) { | 211 maxRev = rev2 < lastManifestRevision-5 ? rev2+5 : lastManifestRevision; |
| 160 delegate.end(revision); | 212 initCacheRange(minRev, maxRev); |
| 161 delegate = null; | 213 } |
| 162 return true; | |
| 163 } | |
| 164 }); | |
| 165 } | 214 } |
| 166 r1 = get(rev1); | 215 r1 = get(rev1); |
| 167 r2 = get(rev2); | 216 r2 = get(rev2); |
| 168 | 217 |
| 169 PathPool pp = getPathPool(); | 218 PathPool pp = getPathPool(); |
| 394 } | 443 } |
| 395 if (idsPool != null) { | 444 if (idsPool != null) { |
| 396 nid = idsPool.unify(nid); | 445 nid = idsPool.unify(nid); |
| 397 } | 446 } |
| 398 idsMap.put(fname, nid); | 447 idsMap.put(fname, nid); |
| 399 flagsMap.put(fname, flags); | 448 if (flags != null) { |
| 449 // TreeMap$Entry takes 32 bytes. No reason to keep null for such price | |
| 450 // Perhaps, Map<String, Pair<Nodeid, String>> might be better solution | |
| 451 flagsMap.put(fname, flags); | |
| 452 } | |
| 400 return true; | 453 return true; |
| 401 } | 454 } |
| 402 | 455 |
| 403 public boolean end(int revision) { | 456 public boolean end(int revision) { |
| 404 // in fact, this class cares about single revision | 457 // in fact, this class cares about single revision |
