Mercurial > hg4j
comparison src/org/tmatesoft/hg/internal/StoragePathHelper.java @ 414:bb278ccf9866
Pull changes from smartgit3 branch
| author | Artem Tikhomirov <tikhomirov.artem@gmail.com> | 
|---|---|
| date | Wed, 21 Mar 2012 20:51:12 +0100 | 
| parents | 464b4404e75d | 
| children | 528b6780a8bd | 
   comparison
  equal
  deleted
  inserted
  replaced
| 413:7f27122011c3 | 414:bb278ccf9866 | 
|---|---|
| 1 /* | 1 /* | 
| 2 * Copyright (c) 2011 TMate Software Ltd | 2 * Copyright (c) 2011-2012 TMate Software Ltd | 
| 3 * | 3 * | 
| 4 * This program is free software; you can redistribute it and/or modify | 4 * This program is free software; you can redistribute it and/or modify | 
| 5 * it under the terms of the GNU General Public License as published by | 5 * it under the terms of the GNU General Public License as published by | 
| 6 * the Free Software Foundation; version 2 of the License. | 6 * the Free Software Foundation; version 2 of the License. | 
| 7 * | 7 * | 
| 14 * the terms of a license other than GNU General Public License | 14 * the terms of a license other than GNU General Public License | 
| 15 * contact TMate Software at support@hg4j.com | 15 * contact TMate Software at support@hg4j.com | 
| 16 */ | 16 */ | 
| 17 package org.tmatesoft.hg.internal; | 17 package org.tmatesoft.hg.internal; | 
| 18 | 18 | 
| 19 import java.nio.ByteBuffer; | |
| 20 import java.nio.CharBuffer; | |
| 21 import java.nio.charset.Charset; | |
| 22 import java.nio.charset.CharsetEncoder; | |
| 19 import java.util.Arrays; | 23 import java.util.Arrays; | 
| 20 import java.util.TreeSet; | 24 import java.util.TreeSet; | 
| 25 import java.util.regex.Matcher; | |
| 26 import java.util.regex.Pattern; | |
| 21 | 27 | 
| 22 import org.tmatesoft.hg.util.PathRewrite; | 28 import org.tmatesoft.hg.util.PathRewrite; | 
| 23 | 29 | 
| 24 /** | 30 /** | 
| 25 * @see http://mercurial.selenic.com/wiki/CaseFoldingPlan | 31 * @see http://mercurial.selenic.com/wiki/CaseFoldingPlan | 
| 26 * @see http://mercurial.selenic.com/wiki/fncacheRepoFormat | 32 * @see http://mercurial.selenic.com/wiki/fncacheRepoFormat | 
| 33 * @see http://mercurial.selenic.com/wiki/EncodingStrategy | |
| 27 * | 34 * | 
| 28 * @author Artem Tikhomirov | 35 * @author Artem Tikhomirov | 
| 29 * @author TMate Software Ltd. | 36 * @author TMate Software Ltd. | 
| 30 */ | 37 */ | 
| 31 class StoragePathHelper implements PathRewrite { | 38 class StoragePathHelper implements PathRewrite { | 
| 32 | 39 | 
| 33 private final boolean store; | 40 private final boolean store; | 
| 34 private final boolean fncache; | 41 private final boolean fncache; | 
| 35 private final boolean dotencode; | 42 private final boolean dotencode; | 
| 36 | 43 private final Pattern suffix2replace; | 
| 44 private final CharsetEncoder csEncoder; | |
| 45 private final char[] hexEncodedByte = new char[] {'~', '0', '0'}; | |
| 46 private final ByteBuffer byteEncodingBuf; | |
| 47 private final CharBuffer charEncodingBuf; | |
| 48 | |
| 37 public StoragePathHelper(boolean isStore, boolean isFncache, boolean isDotencode) { | 49 public StoragePathHelper(boolean isStore, boolean isFncache, boolean isDotencode) { | 
| 50 this(isStore, isFncache, isDotencode, Charset.defaultCharset()); | |
| 51 } | |
| 52 | |
| 53 public StoragePathHelper(boolean isStore, boolean isFncache, boolean isDotencode, Charset fsEncoding) { | |
| 54 assert fsEncoding != null; | |
| 38 store = isStore; | 55 store = isStore; | 
| 39 fncache = isFncache; | 56 fncache = isFncache; | 
| 40 dotencode = isDotencode; | 57 dotencode = isDotencode; | 
| 58 suffix2replace = Pattern.compile("\\.([id]|hg)/"); | |
| 59 csEncoder = fsEncoding.newEncoder(); // FIXME catch exception and rethrow as our's RT | |
| 60 byteEncodingBuf = ByteBuffer.allocate(Math.round(csEncoder.maxBytesPerChar()) + 1/*in fact, need ceil, hence +1*/); | |
| 61 charEncodingBuf = CharBuffer.allocate(1); | |
| 41 } | 62 } | 
| 42 | 63 | 
| 43 // FIXME document what path argument is, whether it includes .i or .d, and whether it's 'normalized' (slashes) or not. | 64 // FIXME document what path argument is, whether it includes .i or .d, and whether it's 'normalized' (slashes) or not. | 
| 44 // since .hg/store keeps both .i files and files without extension (e.g. fncache), guees, for data == false | 65 // since .hg/store keeps both .i files and files without extension (e.g. fncache), guees, for data == false | 
| 45 // we shall assume path has extension | 66 // we shall assume path has extension | 
| 46 public CharSequence rewrite(CharSequence p) { | 67 public CharSequence rewrite(CharSequence p) { | 
| 47 final String STR_STORE = "store/"; | 68 final String STR_STORE = "store/"; | 
| 48 final String STR_DATA = "data/"; | 69 final String STR_DATA = "data/"; | 
| 49 final String STR_DH = "dh/"; | 70 final String STR_DH = "dh/"; | 
| 50 final String reservedChars = "\\:*?\"<>|"; | 71 final String reservedChars = "\\:*?\"<>|"; | 
| 51 char[] hexByte = new char[2]; | |
| 52 | 72 | 
| 53 String path = p.toString(); | 73 Matcher suffixMatcher = suffix2replace.matcher(p); | 
| 54 path = path.replace(".hg/", ".hg.hg/").replace(".i/", ".i.hg/").replace(".d/", ".d.hg/"); | 74 CharSequence path; | 
| 75 // Matcher.replaceAll, but without extra toString | |
| 76 boolean found = suffixMatcher.find(); | |
| 77 if (found) { | |
| 78 StringBuffer sb = new StringBuffer(p.length() + 20); | |
| 79 do { | |
| 80 suffixMatcher.appendReplacement(sb, ".$1.hg/"); | |
| 81 } while (found = suffixMatcher.find()); | |
| 82 suffixMatcher.appendTail(sb); | |
| 83 path = sb; | |
| 84 } else { | |
| 85 path = p; | |
| 86 } | |
| 87 | |
| 55 StringBuilder sb = new StringBuilder(path.length() << 1); | 88 StringBuilder sb = new StringBuilder(path.length() << 1); | 
| 56 if (store || fncache) { | 89 if (store || fncache) { | 
| 57 // encodefilename | |
| 58 for (int i = 0; i < path.length(); i++) { | 90 for (int i = 0; i < path.length(); i++) { | 
| 59 final char ch = path.charAt(i); | 91 final char ch = path.charAt(i); | 
| 60 if (ch >= 'a' && ch <= 'z') { | 92 if (ch >= 'a' && ch <= 'z') { | 
| 61 sb.append(ch); // POIRAE | 93 sb.append(ch); // POIRAE | 
| 62 } else if (ch >= 'A' && ch <= 'Z') { | 94 } else if (ch >= 'A' && ch <= 'Z') { | 
| 63 sb.append('_'); | 95 sb.append('_'); | 
| 64 sb.append(Character.toLowerCase(ch)); // Perhaps, (char) (((int) ch) + 32)? Even better, |= 0x20? | 96 sb.append(Character.toLowerCase(ch)); // Perhaps, (char) (((int) ch) + 32)? Even better, |= 0x20? | 
| 65 } else if (reservedChars.indexOf(ch) != -1) { | 97 } else if (reservedChars.indexOf(ch) != -1) { | 
| 66 sb.append('~'); | 98 sb.append(toHexByte(ch)); | 
| 67 sb.append(toHexByte(ch, hexByte)); | |
| 68 } else if ((ch >= '~' /*126*/ && ch <= 255) || ch < ' ' /*32*/) { | 99 } else if ((ch >= '~' /*126*/ && ch <= 255) || ch < ' ' /*32*/) { | 
| 69 sb.append('~'); | 100 sb.append(toHexByte(ch)); | 
| 70 sb.append(toHexByte(ch, hexByte)); | |
| 71 } else if (ch == '_') { | 101 } else if (ch == '_') { | 
| 72 sb.append('_'); | 102 sb.append('_'); | 
| 73 sb.append('_'); | 103 sb.append('_'); | 
| 74 } else { | 104 } else { | 
| 75 sb.append(ch); | 105 // either ASCII char that doesn't require special handling, or an Unicode character to get encoded | 
| 106 // according to filesystem/native encoding, see http://mercurial.selenic.com/wiki/EncodingStrategy | |
| 107 // despite of what the page says, use of native encoding seems worst solution to me (repositories | |
| 108 // can't be easily shared between OS'es with different encodings then, e.g. Win1251 and Linux UTF8). | |
| 109 // If the ease of sharing was not the point, what's the reason to mangle with names at all then ( | |
| 110 // lowercase and exclude reserved device names). | |
| 111 if (ch < '~' /*126*/ || !csEncoder.canEncode(ch)) { | |
| 112 sb.append(ch); | |
| 113 } else { | |
| 114 appendEncoded(sb, ch); | |
| 115 } | |
| 76 } | 116 } | 
| 77 } | 117 } | 
| 78 // auxencode | 118 // auxencode | 
| 79 if (fncache) { | 119 if (fncache) { | 
| 80 encodeWindowsDeviceNames(sb); | 120 encodeWindowsDeviceNames(sb); | 
| 81 } | 121 } | 
| 82 } | 122 } | 
| 83 final int MAX_PATH_LEN = 120; | 123 final int MAX_PATH_LEN = 120; | 
| 84 if (fncache && (sb.length() + STR_DATA.length() + ".i".length() > MAX_PATH_LEN)) { | 124 if (fncache && (sb.length() + STR_DATA.length() + ".i".length() > MAX_PATH_LEN)) { | 
| 125 // TODO [post-1.0] Mercurial uses system encoding for paths, hence we need to pass bytes to DigestHelper | |
| 126 // to ensure our sha1 value (default encoding of unicode string if one looks into DH impl) match that | |
| 127 // produced by Mercurial (based on native string). | |
| 85 String digest = new DigestHelper().sha1(STR_DATA, path, ".i").asHexString(); | 128 String digest = new DigestHelper().sha1(STR_DATA, path, ".i").asHexString(); | 
| 86 final int DIR_PREFIX_LEN = 8; | 129 final int DIR_PREFIX_LEN = 8; | 
| 87 // not sure why (-4) is here. 120 - 40 = up to 80 for path with ext. dh/ + ext(.i) = 3+2 | 130 // not sure why (-4) is here. 120 - 40 = up to 80 for path with ext. dh/ + ext(.i) = 3+2 | 
| 88 final int MAX_DIR_PREFIX = 8 * (DIR_PREFIX_LEN + 1) - 4; | 131 final int MAX_DIR_PREFIX = 8 * (DIR_PREFIX_LEN + 1) - 4; | 
| 89 sb = new StringBuilder(MAX_PATH_LEN); | 132 sb = new StringBuilder(MAX_PATH_LEN); | 
| 92 if (ch >= 'a' && ch <= 'z') { | 135 if (ch >= 'a' && ch <= 'z') { | 
| 93 sb.append(ch); | 136 sb.append(ch); | 
| 94 } else if (ch >= 'A' && ch <= 'Z') { | 137 } else if (ch >= 'A' && ch <= 'Z') { | 
| 95 sb.append((char) (ch | 0x20)); // lowercase | 138 sb.append((char) (ch | 0x20)); // lowercase | 
| 96 } else if (reservedChars.indexOf(ch) != -1) { | 139 } else if (reservedChars.indexOf(ch) != -1) { | 
| 97 sb.append('~'); | 140 sb.append(toHexByte(ch)); | 
| 98 sb.append(toHexByte(ch, hexByte)); | |
| 99 } else if ((ch >= '~' /*126*/ && ch <= 255) || ch < ' ' /*32*/) { | 141 } else if ((ch >= '~' /*126*/ && ch <= 255) || ch < ' ' /*32*/) { | 
| 100 sb.append('~'); | 142 sb.append(toHexByte(ch)); | 
| 101 sb.append(toHexByte(ch, hexByte)); | |
| 102 } else { | 143 } else { | 
| 103 sb.append(ch); | 144 if (ch < '~' /*126*/ || !csEncoder.canEncode(ch)) { | 
| 145 sb.append(ch); | |
| 146 } else { | |
| 147 appendEncoded(sb, ch); | |
| 148 } | |
| 104 } | 149 } | 
| 105 } | 150 } | 
| 106 encodeWindowsDeviceNames(sb); | 151 encodeWindowsDeviceNames(sb); | 
| 107 int fnameStart = sb.lastIndexOf("/"); // since we rewrite file names, it never ends with slash (for dirs, I'd pass length-2); | 152 int fnameStart = sb.lastIndexOf("/"); // since we rewrite file names, it never ends with slash (for dirs, I'd pass length-2); | 
| 108 StringBuilder completeHashName = new StringBuilder(MAX_PATH_LEN); | 153 StringBuilder completeHashName = new StringBuilder(MAX_PATH_LEN); | 
| 161 sb.append(".i"); | 206 sb.append(".i"); | 
| 162 return sb.toString(); | 207 return sb.toString(); | 
| 163 } | 208 } | 
| 164 | 209 | 
| 165 private void encodeWindowsDeviceNames(StringBuilder sb) { | 210 private void encodeWindowsDeviceNames(StringBuilder sb) { | 
| 166 char[] hexByte = new char[2]; | |
| 167 int x = 0; // last segment start | 211 int x = 0; // last segment start | 
| 168 final TreeSet<String> windowsReservedFilenames = new TreeSet<String>(); | 212 final TreeSet<String> windowsReservedFilenames = new TreeSet<String>(); | 
| 169 windowsReservedFilenames.addAll(Arrays.asList("con prn aux nul com1 com2 com3 com4 com5 com6 com7 com8 com9 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9".split(" "))); | 213 windowsReservedFilenames.addAll(Arrays.asList("con prn aux nul com1 com2 com3 com4 com5 com6 com7 com8 com9 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9".split(" "))); | 
| 170 do { | 214 do { | 
| 171 int i = sb.indexOf("/", x); | 215 int i = sb.indexOf("/", x); | 
| 181 found = windowsReservedFilenames.contains(sb.subSequence(x, x+3)); | 225 found = windowsReservedFilenames.contains(sb.subSequence(x, x+3)); | 
| 182 } else if (i-x > 4 && sb.charAt(x+4) == '.') { | 226 } else if (i-x > 4 && sb.charAt(x+4) == '.') { | 
| 183 found = windowsReservedFilenames.contains(sb.subSequence(x, x+4)); | 227 found = windowsReservedFilenames.contains(sb.subSequence(x, x+4)); | 
| 184 } | 228 } | 
| 185 if (found) { | 229 if (found) { | 
| 186 sb.insert(x+3, toHexByte(sb.charAt(x+2), hexByte)); | 230 // x+2 as we change the third letter in device name | 
| 187 sb.setCharAt(x+2, '~'); | 231 replace(sb, x+2, toHexByte(sb.charAt(x+2))); | 
| 188 i += 2; | 232 i += 2; | 
| 189 } | 233 } | 
| 190 } | 234 } | 
| 191 if (dotencode && (sb.charAt(x) == '.' || sb.charAt(x) == ' ')) { | 235 if (dotencode && (sb.charAt(x) == '.' || sb.charAt(x) == ' ')) { | 
| 192 sb.insert(x+1, toHexByte(sb.charAt(x), hexByte)); | 236 char dotOrSpace = sb.charAt(x); // beware, replace() below changes charAt(x), rather get a copy | 
| 193 sb.setCharAt(x, '~'); // setChar *after* charAt/insert to get ~2e, not ~7e for '.' | 237 // not to get ~7e for '.' instead of ~2e, if later refactoring changes the logic | 
| 238 replace(sb, x, toHexByte(dotOrSpace)); | |
| 194 i += 2; | 239 i += 2; | 
| 195 } | 240 } | 
| 196 x = i+1; | 241 x = i+1; | 
| 197 } while (x < sb.length()); | 242 } while (x < sb.length()); | 
| 198 } | 243 } | 
| 199 | 244 | 
| 200 private static char[] toHexByte(int ch, char[] buf) { | 245 // shall be synchronized in case of multithreaded use | 
| 201 assert buf.length > 1; | 246 private void appendEncoded(StringBuilder sb, char ch) { | 
| 247 charEncodingBuf.clear(); | |
| 248 byteEncodingBuf.clear(); | |
| 249 charEncodingBuf.put(ch).flip(); | |
| 250 csEncoder.encode(charEncodingBuf, byteEncodingBuf, false); | |
| 251 byteEncodingBuf.flip(); | |
| 252 while (byteEncodingBuf.hasRemaining()) { | |
| 253 sb.append(toHexByte(byteEncodingBuf.get())); | |
| 254 } | |
| 255 } | |
| 256 | |
| 257 /** | |
| 258 * replace char at sb[index] with a sequence | |
| 259 */ | |
| 260 private static void replace(StringBuilder sb, int index, char[] with) { | |
| 261 // there's StringBuilder.replace(int, int+1, String), but with char[] - I don't want to make a string out of hexEncodedByte | |
| 262 sb.setCharAt(index, with[0]); | |
| 263 sb.insert(index+1, with, 1, with.length - 1); | |
| 264 } | |
| 265 | |
| 266 /** | |
| 267 * put hex representation of byte ch into buf from specified offset | |
| 268 */ | |
| 269 private char[] toHexByte(int ch) { | |
| 202 final String hexDigits = "0123456789abcdef"; | 270 final String hexDigits = "0123456789abcdef"; | 
| 203 buf[0] = hexDigits.charAt((ch & 0x00F0) >>> 4); | 271 hexEncodedByte[1] = hexDigits.charAt((ch & 0x00F0) >>> 4); | 
| 204 buf[1] = hexDigits.charAt(ch & 0x0F); | 272 hexEncodedByte[2] = hexDigits.charAt(ch & 0x0F); | 
| 205 return buf; | 273 return hexEncodedByte; | 
| 206 } | 274 } | 
| 207 } | 275 } | 
