Mercurial > jhg
comparison src/org/tmatesoft/hg/repo/HgManifest.java @ 268:c5980f287cc4
Use StringProxy when parsing manifest to minimize number of useless conversions and array instances
| author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
|---|---|
| date | Tue, 23 Aug 2011 22:30:56 +0200 |
| parents | ec921ef0628e |
| children | 6dbbc53fc46d |
comparison
equal
deleted
inserted
replaced
| 267:ec921ef0628e | 268:c5980f287cc4 |
|---|---|
| 152 | 152 |
| 153 public interface ElementProxy<T> { | 153 public interface ElementProxy<T> { |
| 154 T get(); | 154 T get(); |
| 155 } | 155 } |
| 156 | 156 |
| 157 private static class PoolStringProxy { | 157 /** |
| 158 * When Pool uses Strings directly, | |
| 159 * ManifestParser creates new String instance with new char[] value, and does byte->char conversion. | |
| 160 * For cpython repo, walk(0..10k), there are over 16 million filenames, of them only 3020 unique. | |
| 161 * This means there are 15.9 million useless char[] instances and byte->char conversions | |
| 162 * | |
| 163 * When String is wrapped into {@link StringProxy}, there's extra overhead of byte[] representation | |
| 164 * of the String, but these are only for unique Strings (3020 in the example above). Besides, I save | |
| 165 * useless char[] and byte->char conversions. | |
| 166 */ | |
| 167 private static class StringProxy { | |
| 158 private byte[] data; | 168 private byte[] data; |
| 159 private int start, length; | 169 private int start; |
| 160 private int hash; | 170 private final int hash, length; |
| 161 private String result; | 171 private String result; |
| 162 | 172 |
| 163 public void init(byte[] data, int start, int length) { | 173 public StringProxy(byte[] data, int start, int length) { |
| 164 this.data = data; | 174 this.data = data; |
| 165 this.start = start; | 175 this.start = start; |
| 166 this.length = length; | 176 this.length = length; |
| 167 | 177 |
| 168 // copy from String.hashCode() | 178 // copy from String.hashCode(). In fact, not necessarily match result of String(data).hashCode |
| 179 // just need some nice algorithm here | |
| 169 int h = 0; | 180 int h = 0; |
| 170 byte[] d = data; | 181 byte[] d = data; |
| 171 for (int i = 0, off = start, len = length; i < len; i++) { | 182 for (int i = 0, off = start, len = length; i < len; i++) { |
| 172 h = 31 * h + d[off++]; | 183 h = 31 * h + d[off++]; |
| 173 } | 184 } |
| 174 hash = h; | 185 hash = h; |
| 175 } | 186 } |
| 176 | 187 |
| 177 @Override | 188 @Override |
| 178 public boolean equals(Object obj) { | 189 public boolean equals(Object obj) { |
| 179 if (false == obj instanceof PoolStringProxy) { | 190 if (false == obj instanceof StringProxy) { |
| 180 return false; | 191 return false; |
| 181 } | 192 } |
| 182 PoolStringProxy o = (PoolStringProxy) obj; | 193 StringProxy o = (StringProxy) obj; |
| 183 if (o.result != null && result != null) { | 194 if (o.result != null && result != null) { |
| 184 return result.equals(o.result); | 195 return result.equals(o.result); |
| 185 } | 196 } |
| 186 if (o.result == null && result != null || o.result != null && result == null) { | 197 if (o.length != length || o.hash != hash) { |
| 187 String s; PoolStringProxy noString; | |
| 188 if (o.result == null) { | |
| 189 s = result; | |
| 190 noString = o; | |
| 191 } else { | |
| 192 s = o.result; | |
| 193 noString = this; | |
| 194 } | |
| 195 | |
| 196 } | |
| 197 // both are null | |
| 198 if (o.length != length) { | |
| 199 return false; | 198 return false; |
| 200 } | 199 } |
| 201 for (int i = 0, x = o.start, y = start; i < length; i++) { | 200 for (int i = 0, x = o.start, y = start; i < length; i++) { |
| 202 if (o.data[x++] != data[y++]) { | 201 if (o.data[x++] != data[y++]) { |
| 203 return false; | 202 return false; |
| 211 } | 210 } |
| 212 | 211 |
| 213 public String freeze() { | 212 public String freeze() { |
| 214 if (result == null) { | 213 if (result == null) { |
| 215 result = new String(data, start, length); | 214 result = new String(data, start, length); |
| 216 data = null; | 215 // release reference to bigger data array, make a copy of relevant part only |
| 217 start = length = -1; | 216 byte[] d = new byte[length]; |
| 217 System.arraycopy(data, start, d, 0, length); | |
| 218 data = d; | |
| 219 start = 0; | |
| 218 } | 220 } |
| 219 return result; | 221 return result; |
| 220 } | 222 } |
| 221 } | 223 } |
| 222 | 224 |
| 223 private static class ManifestParser implements RevlogStream.Inspector/*, Lifecycle */{ | 225 private static class ManifestParser implements RevlogStream.Inspector { |
| 224 private boolean gtg = true; // good to go | 226 private boolean gtg = true; // good to go |
| 225 private final Inspector inspector; | 227 private final Inspector inspector; |
| 226 private Pool<Nodeid> nodeidPool, thisRevPool; | 228 private Pool<Nodeid> nodeidPool, thisRevPool; |
| 227 private final Pool<String> fnamePool; | 229 private final Pool<StringProxy> fnamePool; |
| 228 private final Pool<String> flagsPool; | 230 private final Pool<StringProxy> flagsPool; |
| 229 private byte[] nodeidLookupBuffer = new byte[20]; // get reassigned each time new Nodeid is added to pool | 231 private byte[] nodeidLookupBuffer = new byte[20]; // get reassigned each time new Nodeid is added to pool |
| 230 | 232 |
| 231 public ManifestParser(Inspector delegate) { | 233 public ManifestParser(Inspector delegate) { |
| 232 assert delegate != null; | 234 assert delegate != null; |
| 233 inspector = delegate; | 235 inspector = delegate; |
| 234 nodeidPool = new Pool<Nodeid>(); | 236 nodeidPool = new Pool<Nodeid>(); |
| 235 fnamePool = new Pool<String>(); | 237 fnamePool = new Pool<StringProxy>(); |
| 236 flagsPool = new Pool<String>(); | 238 flagsPool = new Pool<StringProxy>(); |
| 237 thisRevPool = new Pool<Nodeid>(); | 239 thisRevPool = new Pool<Nodeid>(); |
| 238 } | 240 } |
| 239 | 241 |
| 240 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { | 242 public void next(int revisionNumber, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[] nodeid, DataAccess da) { |
| 241 if (!gtg) { | 243 if (!gtg) { |
| 242 return; | 244 return; |
| 243 } | 245 } |
| 244 try { | 246 try { |
| 250 byte[] data = da.byteArray(); | 252 byte[] data = da.byteArray(); |
| 251 for (i = 0; gtg && i < actualLen; i++) { | 253 for (i = 0; gtg && i < actualLen; i++) { |
| 252 int x = i; | 254 int x = i; |
| 253 for( ; data[i] != '\n' && i < actualLen; i++) { | 255 for( ; data[i] != '\n' && i < actualLen; i++) { |
| 254 if (fname == null && data[i] == 0) { | 256 if (fname == null && data[i] == 0) { |
| 255 fname = fnamePool.unify(new String(data, x, i - x)); | 257 StringProxy px = fnamePool.unify(new StringProxy(data, x, i - x)); |
| 258 // if (cached = fnamePool.unify(px))== px then cacheMiss, else cacheHit | |
| 259 // cpython 0..10k: hits: 15 989 152, misses: 3020 | |
| 260 fname = px.freeze(); | |
| 256 x = i+1; | 261 x = i+1; |
| 257 } | 262 } |
| 258 } | 263 } |
| 259 if (i < actualLen) { | 264 if (i < actualLen) { |
| 260 assert data[i] == '\n'; | 265 assert data[i] == '\n'; |
| 270 } // for cpython 0..10k, cache hits are 15 973 301, vs 18871 misses. | 275 } // for cpython 0..10k, cache hits are 15 973 301, vs 18871 misses. |
| 271 thisRevPool.record(nid); // memorize revision for the next iteration. | 276 thisRevPool.record(nid); // memorize revision for the next iteration. |
| 272 if (nodeidLen + x < i) { | 277 if (nodeidLen + x < i) { |
| 273 // 'x' and 'l' for executable bits and symlinks? | 278 // 'x' and 'l' for executable bits and symlinks? |
| 274 // hg --debug manifest shows 644 for each regular file in my repo | 279 // hg --debug manifest shows 644 for each regular file in my repo |
| 275 flags = flagsPool.unify(new String(data, x + nodeidLen, i-x-nodeidLen)); | 280 // for cpython 0..10k, there are 4361062 flagPool checks, and there's only 1 unique flag |
| 281 flags = flagsPool.unify(new StringProxy(data, x + nodeidLen, i-x-nodeidLen)).freeze(); | |
| 276 } | 282 } |
| 277 gtg = gtg && inspector.next(nid, fname, flags); | 283 gtg = gtg && inspector.next(nid, fname, flags); |
| 278 } | 284 } |
| 279 nid = null; | 285 nid = null; |
| 280 fname = flags = null; | 286 fname = flags = null; |
| 290 thisRevPool = t; | 296 thisRevPool = t; |
| 291 } catch (IOException ex) { | 297 } catch (IOException ex) { |
| 292 throw new HgBadStateException(ex); | 298 throw new HgBadStateException(ex); |
| 293 } | 299 } |
| 294 } | 300 } |
| 295 // | |
| 296 // public void start(int count, Callback callback, Object token) { | |
| 297 // } | |
| 298 // | |
| 299 // public void finish(Object token) { | |
| 300 // System.out.println(fnamePool); | |
| 301 // System.out.println(nodeidPool); | |
| 302 // System.out.printf("Free mem once parse done: %,d\n", Runtime.getRuntime().freeMemory()); | |
| 303 // } | |
| 304 } | 301 } |
| 305 | 302 |
| 306 private static class RevisionMapper implements RevlogStream.Inspector, Lifecycle { | 303 private static class RevisionMapper implements RevlogStream.Inspector, Lifecycle { |
| 307 | 304 |
| 308 private final int changelogRevisions; | 305 private final int changelogRevisions; |
