Mercurial > hg4j
comparison src/org/tmatesoft/hg/repo/HgManifest.java @ 285:6dbbc53fc46d
Use Path instead of plain String for manifest file names
| author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
|---|---|
| date | Sat, 03 Sep 2011 21:46:13 +0200 |
| parents | c5980f287cc4 |
| children | 650b45d290b1 |
comparison
equal
deleted
inserted
replaced
| 284:7232b94f2ae3 | 285:6dbbc53fc46d |
|---|---|
| 39 * @author Artem Tikhomirov | 39 * @author Artem Tikhomirov |
| 40 * @author TMate Software Ltd. | 40 * @author TMate Software Ltd. |
| 41 */ | 41 */ |
| 42 public class HgManifest extends Revlog { | 42 public class HgManifest extends Revlog { |
| 43 private RevisionMapper revisionMap; | 43 private RevisionMapper revisionMap; |
| 44 | |
| 45 public enum Flags { | |
| 46 Exec, Link; | |
| 47 | |
| 48 static Flags parse(String flags) { | |
| 49 if ("x".equalsIgnoreCase(flags)) { | |
| 50 return Exec; | |
| 51 } | |
| 52 if ("l".equalsIgnoreCase(flags)) { | |
| 53 return Link; | |
| 54 } | |
| 55 if (flags == null) { | |
| 56 return null; | |
| 57 } | |
| 58 throw new IllegalStateException(flags); | |
| 59 } | |
| 60 | |
| 61 static Flags parse(byte[] data, int start, int length) { | |
| 62 if (length == 0) { | |
| 63 return null; | |
| 64 } | |
| 65 if (length == 1) { | |
| 66 if (data[start] == 'x') { | |
| 67 return Exec; | |
| 68 } | |
| 69 if (data[start] == 'l') { | |
| 70 return Link; | |
| 71 } | |
| 72 // FALL THROUGH | |
| 73 } | |
| 74 throw new IllegalStateException(new String(data, start, length)); | |
| 75 } | |
| 76 | |
| 77 String nativeString() { | |
| 78 if (this == Exec) { | |
| 79 return "x"; | |
| 80 } | |
| 81 if (this == Link) { | |
| 82 return "l"; | |
| 83 } | |
| 84 throw new IllegalStateException(toString()); | |
| 85 } | |
| 86 } | |
| 44 | 87 |
| 45 /*package-local*/ HgManifest(HgRepository hgRepo, RevlogStream content) { | 88 /*package-local*/ HgManifest(HgRepository hgRepo, RevlogStream content) { |
| 46 super(hgRepo, content); | 89 super(hgRepo, content); |
| 47 } | 90 } |
| 48 | 91 |
| 144 return rv[0]; | 187 return rv[0]; |
| 145 } | 188 } |
| 146 | 189 |
| 147 public interface Inspector { | 190 public interface Inspector { |
| 148 boolean begin(int mainfestRevision, Nodeid nid, int changelogRevision); | 191 boolean begin(int mainfestRevision, Nodeid nid, int changelogRevision); |
| 192 /** | |
| 193 * @deprecated switch to {@link Inspector2#next(Nodeid, Path, Flags)} | |
| 194 */ | |
| 195 @Deprecated | |
| 149 boolean next(Nodeid nid, String fname, String flags); | 196 boolean next(Nodeid nid, String fname, String flags); |
| 150 boolean end(int manifestRevision); | 197 boolean end(int manifestRevision); |
| 151 } | 198 } |
| 152 | 199 |
| 153 public interface ElementProxy<T> { | 200 @Experimental(reason="Explore Path alternative for filenames and enum for flags") |
| 154 T get(); | 201 public interface Inspector2 extends Inspector { |
| 202 boolean next(Nodeid nid, Path fname, Flags flags); | |
| 155 } | 203 } |
| 156 | 204 |
| 157 /** | 205 /** |
| 158 * When Pool uses Strings directly, | 206 * When Pool uses Strings directly, |
| 159 * ManifestParser creates new String instance with new char[] value, and does byte->char conversion. | 207 * ManifestParser creates new String instance with new char[] value, and does byte->char conversion. |
| 160 * For cpython repo, walk(0..10k), there are over 16 million filenames, of them only 3020 unique. | 208 * For cpython repo, walk(0..10k), there are over 16 million filenames, of them only 3020 unique. |
| 161 * This means there are 15.9 million useless char[] instances and byte->char conversions | 209 * This means there are 15.9 million useless char[] instances and byte->char conversions |
| 162 * | 210 * |
| 163 * When String is wrapped into {@link StringProxy}, there's extra overhead of byte[] representation | 211 * When String (Path) is wrapped into {@link PathProxy}, there's extra overhead of byte[] representation |
| 164 * of the String, but these are only for unique Strings (3020 in the example above). Besides, I save | 212 * of the String, but these are only for unique Strings (Paths) (3020 in the example above). Besides, I save |
| 165 * useless char[] and byte->char conversions. | 213 * useless char[] and byte->char conversions. |
| 166 */ | 214 */ |
| 167 private static class StringProxy { | 215 private static class PathProxy { |
| 168 private byte[] data; | 216 private byte[] data; |
| 169 private int start; | 217 private int start; |
| 170 private final int hash, length; | 218 private final int hash, length; |
| 171 private String result; | 219 private Path result; |
| 172 | 220 |
| 173 public StringProxy(byte[] data, int start, int length) { | 221 public PathProxy(byte[] data, int start, int length) { |
| 174 this.data = data; | 222 this.data = data; |
| 175 this.start = start; | 223 this.start = start; |
| 176 this.length = length; | 224 this.length = length; |
| 177 | 225 |
| 178 // copy from String.hashCode(). In fact, not necessarily match result of String(data).hashCode | 226 // copy from String.hashCode(). In fact, not necessarily match result of String(data).hashCode |
| 185 hash = h; | 233 hash = h; |
| 186 } | 234 } |
| 187 | 235 |
| 188 @Override | 236 @Override |
| 189 public boolean equals(Object obj) { | 237 public boolean equals(Object obj) { |
| 190 if (false == obj instanceof StringProxy) { | 238 if (false == obj instanceof PathProxy) { |
| 191 return false; | 239 return false; |
| 192 } | 240 } |
| 193 StringProxy o = (StringProxy) obj; | 241 PathProxy o = (PathProxy) obj; |
| 194 if (o.result != null && result != null) { | 242 if (o.result != null && result != null) { |
| 195 return result.equals(o.result); | 243 return result.equals(o.result); |
| 196 } | 244 } |
| 197 if (o.length != length || o.hash != hash) { | 245 if (o.length != length || o.hash != hash) { |
| 198 return false; | 246 return false; |
| 207 @Override | 255 @Override |
| 208 public int hashCode() { | 256 public int hashCode() { |
| 209 return hash; | 257 return hash; |
| 210 } | 258 } |
| 211 | 259 |
| 212 public String freeze() { | 260 public Path freeze() { |
| 213 if (result == null) { | 261 if (result == null) { |
| 214 result = new String(data, start, length); | 262 result = Path.create(new String(data, start, length)); |
| 215 // release reference to bigger data array, make a copy of relevant part only | 263 // release reference to bigger data array, make a copy of relevant part only |
| 264 // use original bytes, not those from String above to avoid cache misses due to different encodings | |
| 216 byte[] d = new byte[length]; | 265 byte[] d = new byte[length]; |
| 217 System.arraycopy(data, start, d, 0, length); | 266 System.arraycopy(data, start, d, 0, length); |
| 218 data = d; | 267 data = d; |
| 219 start = 0; | 268 start = 0; |
| 220 } | 269 } |
| 223 } | 272 } |
| 224 | 273 |
| 225 private static class ManifestParser implements RevlogStream.Inspector { | 274 private static class ManifestParser implements RevlogStream.Inspector { |
| 226 private boolean gtg = true; // good to go | 275 private boolean gtg = true; // good to go |
| 227 private final Inspector inspector; | 276 private final Inspector inspector; |
| 277 private final Inspector2 inspector2; | |
| 228 private Pool<Nodeid> nodeidPool, thisRevPool; | 278 private Pool<Nodeid> nodeidPool, thisRevPool; |
| 229 private final Pool<StringProxy> fnamePool; | 279 private final Pool<PathProxy> fnamePool; |
| 230 private final Pool<StringProxy> flagsPool; | |
| 231 private byte[] nodeidLookupBuffer = new byte[20]; // get reassigned each time new Nodeid is added to pool | 280 private byte[] nodeidLookupBuffer = new byte[20]; // get reassigned each time new Nodeid is added to pool |
| 232 | 281 |
| 233 public ManifestParser(Inspector delegate) { | 282 public ManifestParser(Inspector delegate) { |
| 234 assert delegate != null; | 283 assert delegate != null; |
| 235 inspector = delegate; | 284 inspector = delegate; |
| 285 inspector2 = delegate instanceof Inspector2 ? (Inspector2) delegate : null; | |
| 236 nodeidPool = new Pool<Nodeid>(); | 286 nodeidPool = new Pool<Nodeid>(); |
| 237 fnamePool = new Pool<StringProxy>(); | 287 fnamePool = new Pool<PathProxy>(); |
| 238 flagsPool = new Pool<StringProxy>(); | |
| 239 thisRevPool = new Pool<Nodeid>(); | 288 thisRevPool = new Pool<Nodeid>(); |
| 240 } | 289 } |
| 241 | 290 |
| 242 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { | 291 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { |
| 243 if (!gtg) { | 292 if (!gtg) { |
| 244 return; | 293 return; |
| 245 } | 294 } |
| 246 try { | 295 try { |
| 247 gtg = gtg && inspector.begin(revisionNumber, new Nodeid(nodeid, true), linkRevision); | 296 gtg = gtg && inspector.begin(revisionNumber, new Nodeid(nodeid, true), linkRevision); |
| 248 String fname = null; | 297 Path fname = null; |
| 249 String flags = null; | 298 Flags flags = null; |
| 250 Nodeid nid = null; | 299 Nodeid nid = null; |
| 251 int i; | 300 int i; |
| 252 byte[] data = da.byteArray(); | 301 byte[] data = da.byteArray(); |
| 253 for (i = 0; gtg && i < actualLen; i++) { | 302 for (i = 0; gtg && i < actualLen; i++) { |
| 254 int x = i; | 303 int x = i; |
| 255 for( ; data[i] != '\n' && i < actualLen; i++) { | 304 for( ; data[i] != '\n' && i < actualLen; i++) { |
| 256 if (fname == null && data[i] == 0) { | 305 if (fname == null && data[i] == 0) { |
| 257 StringProxy px = fnamePool.unify(new StringProxy(data, x, i - x)); | 306 PathProxy px = fnamePool.unify(new PathProxy(data, x, i - x)); |
| 258 // if (cached = fnamePool.unify(px))== px then cacheMiss, else cacheHit | 307 // if (cached = fnamePool.unify(px))== px then cacheMiss, else cacheHit |
| 259 // cpython 0..10k: hits: 15 989 152, misses: 3020 | 308 // cpython 0..10k: hits: 15 989 152, misses: 3020 |
| 260 fname = px.freeze(); | 309 fname = px.freeze(); |
| 261 x = i+1; | 310 x = i+1; |
| 262 } | 311 } |
| 275 } // for cpython 0..10k, cache hits are 15 973 301, vs 18871 misses. | 324 } // for cpython 0..10k, cache hits are 15 973 301, vs 18871 misses. |
| 276 thisRevPool.record(nid); // memorize revision for the next iteration. | 325 thisRevPool.record(nid); // memorize revision for the next iteration. |
| 277 if (nodeidLen + x < i) { | 326 if (nodeidLen + x < i) { |
| 278 // 'x' and 'l' for executable bits and symlinks? | 327 // 'x' and 'l' for executable bits and symlinks? |
| 279 // hg --debug manifest shows 644 for each regular file in my repo | 328 // hg --debug manifest shows 644 for each regular file in my repo |
| 280 // for cpython 0..10k, there are 4361062 flagPool checks, and there's only 1 unique flag | 329 // for cpython 0..10k, there are 4361062 flag checks, and there's only 1 unique flag |
| 281 flags = flagsPool.unify(new StringProxy(data, x + nodeidLen, i-x-nodeidLen)).freeze(); | 330 flags = Flags.parse(data, x + nodeidLen, i-x-nodeidLen); |
| 331 } else { | |
| 332 flags = null; | |
| 282 } | 333 } |
| 283 gtg = gtg && inspector.next(nid, fname, flags); | 334 if (inspector2 == null) { |
| 335 String flagString = flags == null ? null : flags.nativeString(); | |
| 336 gtg = inspector.next(nid, fname.toString(), flagString); | |
| 337 } else { | |
| 338 gtg = inspector2.next(nid, fname, flags); | |
| 339 } | |
| 284 } | 340 } |
| 285 nid = null; | 341 nid = null; |
| 286 fname = flags = null; | 342 fname = null; |
| 343 flags = null; | |
| 287 } | 344 } |
| 288 gtg = gtg && inspector.end(revisionNumber); | 345 gtg = gtg && inspector.end(revisionNumber); |
| 289 // | 346 // |
| 290 // keep only actual file revisions, found at this version | 347 // keep only actual file revisions, found at this version |
| 291 // (next manifest is likely to refer to most of them, although in specific cases | 348 // (next manifest is likely to refer to most of them, although in specific cases |
