Mercurial > jhg
comparison src/org/tmatesoft/hg/repo/HgIgnore.java @ 91:c2ce1cfaeb9e
ignore file with regex and 'honest' glob support
| author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
|---|---|
| date | Thu, 27 Jan 2011 06:06:42 +0100 |
| parents | 6f1b88693d48 |
| children | a3a2e5deb320 |
comparison
equal
deleted
inserted
replaced
| 90:a95c700408a9 | 91:c2ce1cfaeb9e |
|---|---|
| 18 | 18 |
| 19 import java.io.BufferedReader; | 19 import java.io.BufferedReader; |
| 20 import java.io.File; | 20 import java.io.File; |
| 21 import java.io.FileReader; | 21 import java.io.FileReader; |
| 22 import java.io.IOException; | 22 import java.io.IOException; |
| 23 import java.util.ArrayList; | |
| 23 import java.util.Collections; | 24 import java.util.Collections; |
| 24 import java.util.Set; | 25 import java.util.List; |
| 25 import java.util.TreeSet; | 26 import java.util.regex.Pattern; |
| 26 | 27 |
| 27 /** | 28 /** |
| 28 * | 29 * |
| 29 * @author Artem Tikhomirov | 30 * @author Artem Tikhomirov |
| 30 * @author TMate Software Ltd. | 31 * @author TMate Software Ltd. |
| 31 */ | 32 */ |
| 32 public class HgIgnore { | 33 public class HgIgnore { |
| 33 | 34 |
| 34 private final HgRepository repo; | 35 private List<Pattern> entries; |
| 35 private Set<String> entries; | |
| 36 | 36 |
| 37 public HgIgnore(HgRepository localRepo) { | 37 HgIgnore() { |
| 38 this.repo = localRepo; | 38 entries = Collections.emptyList(); |
| 39 } | 39 } |
| 40 | 40 |
| 41 private void read() { | 41 /* package-local */void read(File hgignoreFile) throws IOException { |
| 42 entries = Collections.emptySet(); | |
| 43 File hgignoreFile = new File(repo.getRepositoryRoot().getParentFile(), ".hgignore"); | |
| 44 if (!hgignoreFile.exists()) { | 42 if (!hgignoreFile.exists()) { |
| 45 return; | 43 return; |
| 46 } | 44 } |
| 47 entries = new TreeSet<String>(); | 45 ArrayList<Pattern> result = new ArrayList<Pattern>(entries); // start with existing |
| 48 try { | 46 String syntax = "regex"; // or "glob" |
| 49 BufferedReader fr = new BufferedReader(new FileReader(hgignoreFile)); | 47 BufferedReader fr = new BufferedReader(new FileReader(hgignoreFile)); |
| 50 String line; | 48 String line; |
| 51 while ((line = fr.readLine()) != null) { | 49 while ((line = fr.readLine()) != null) { |
| 52 // FIXME need to detect syntax:glob and other parameters | 50 line = line.trim(); |
| 53 entries.add(line.trim()); // shall I account for local paths in the file (i.e. back-slashed on windows)? | 51 if (line.startsWith("syntax:")) { |
| 52 syntax = line.substring("syntax:".length()).trim(); | |
| 53 if (!"regex".equals(syntax) && !"glob".equals(syntax)) { | |
| 54 throw new IllegalStateException(line); | |
| 55 } | |
| 56 } else if (line.length() > 0) { | |
| 57 // shall I account for local paths in the file (i.e. | |
| 58 // back-slashed on windows)? | |
| 59 int x; | |
| 60 if ((x = line.indexOf('#')) >= 0) { | |
| 61 line = line.substring(0, x).trim(); | |
| 62 if (line.length() == 0) { | |
| 63 continue; | |
| 64 } | |
| 65 } | |
| 66 if ("glob".equals(syntax)) { | |
| 67 // hgignore(5) | |
| 68 // (http://www.selenic.com/mercurial/hgignore.5.html) says slashes '\' are escape characters, | |
| 69 // hence no special treatment of Windows path | |
| 70 // however, own attempts make me think '\' on Windows are not treated as escapes | |
| 71 line = glob2regex(line); | |
| 72 } | |
| 73 result.add(Pattern.compile(line)); // case-sensitive | |
| 54 } | 74 } |
| 55 } catch (IOException ex) { | |
| 56 ex.printStackTrace(); // log warn | |
| 57 } | 75 } |
| 76 result.trimToSize(); | |
| 77 entries = result; | |
| 58 } | 78 } |
| 59 | 79 |
| 60 public void reset() { | 80 // note, #isIgnored(), even if queried for directories and returned positive reply, may still get |
| 61 // FIXME does anyone really need to clear HgIgnore? Perhaps, repo may return new instance each time, | 81 // a file from that ignored folder to get examined. Thus, patterns like "bin" shall match not only a folder, |
| 62 // which is used throughout invocation and then discarded? | 82 // but any file under that folder as well |
| 63 entries = null; | 83 // Alternatively, file walker may memorize folder is ignored and uses this information for all nested files. However, |
| 84 // this approach would require walker (a) return directories (b) provide nesting information. This may become | |
| 85 // troublesome when one walks not over io.File, but Eclipse's IResource or any other custom VFS. | |
| 86 // | |
| 87 // | |
| 88 // might be interesting, although looks like of no direct use in my case | |
| 89 // @see http://stackoverflow.com/questions/1247772/is-there-an-equivalent-of-java-util-regex-for-glob-type-patterns | |
| 90 private String glob2regex(String line) { | |
| 91 assert line.length() > 0; | |
| 92 StringBuilder sb = new StringBuilder(line.length() + 10); | |
| 93 sb.append('^'); // help avoid matcher.find() to match 'bin' pattern in the middle of the filename | |
| 94 int start = 0, end = line.length() - 1; | |
| 95 // '*' at the beginning and end of a line are useless for Pattern | |
| 96 while (start <= end && line.charAt(start) == '*') start++; | |
| 97 while (end > start && line.charAt(end) == '*') end--; | |
| 98 | |
| 99 for (int i = start; i <= end; i++) { | |
| 100 char ch = line.charAt(i); | |
| 101 if (ch == '.' || ch == '\\') { | |
| 102 sb.append('\\'); | |
| 103 } else if (ch == '?') { | |
| 104 // simple '.' substitution might work out, however, more formally | |
| 105 // a char class seems more appropriate to avoid accidentally | |
| 106 // matching a subdirectory with ? char (i.e. /a/b?d against /a/bad, /a/bed and /a/b/d) | |
| 107 // @see http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_13_03 | |
| 108 // quote: "The slash character in a pathname shall be explicitly matched by using one or more slashes in the pattern; | |
| 109 // it shall neither be matched by the asterisk or question-mark special characters nor by a bracket expression" | |
| 110 sb.append("[^/]"); | |
| 111 continue; | |
| 112 } else if (ch == '*') { | |
| 113 sb.append("[^/]*?"); | |
| 114 continue; | |
| 115 } | |
| 116 sb.append(ch); | |
| 117 } | |
| 118 return sb.toString(); | |
| 64 } | 119 } |
| 65 | 120 |
| 66 public boolean isIgnored(String path) { | 121 public boolean isIgnored(String path) { |
| 67 if (entries == null) { | 122 for (Pattern p : entries) { |
| 68 read(); | 123 if (p.matcher(path).find()) { |
| 69 } | |
| 70 if (entries.contains(path)) { | |
| 71 // easy part | |
| 72 return true; | |
| 73 } | |
| 74 // substrings are memory-friendly | |
| 75 int x = 0, i = path.indexOf('/', 0); | |
| 76 while (i != -1) { | |
| 77 if (entries.contains(path.substring(x, i))) { | |
| 78 return true; | 124 return true; |
| 79 } | 125 } |
| 80 // try one with ending slash | |
| 81 if (entries.contains(path.substring(x, i+1))) { // even if i is last index, i+1 is safe here | |
| 82 return true; | |
| 83 } | |
| 84 x = i+1; | |
| 85 i = path.indexOf('/', x); | |
| 86 } | 126 } |
| 87 return false; | 127 return false; |
| 88 } | 128 } |
| 89 } | 129 } |
