Mercurial > jhg
comparison src/org/tmatesoft/hg/internal/ConfigFileParser.java @ 497:02140be396d5
Issue 38. Towards gentle handling of config files - parse them and keep every possible user change
| author | Artem Tikhomirov <tikhomirov.artem@gmail.com> |
|---|---|
| date | Thu, 25 Oct 2012 19:59:08 +0200 |
| parents | |
| children | 0205a5c4566b |
comparison
equal
deleted
inserted
replaced
| 496:c1c8f6859d3f | 497:02140be396d5 |
|---|---|
| 1 /* | |
| 2 * Copyright (c) 2012 TMate Software Ltd | |
| 3 * | |
| 4 * This program is free software; you can redistribute it and/or modify | |
| 5 * it under the terms of the GNU General Public License as published by | |
| 6 * the Free Software Foundation; version 2 of the License. | |
| 7 * | |
| 8 * This program is distributed in the hope that it will be useful, | |
| 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 11 * GNU General Public License for more details. | |
| 12 * | |
| 13 * For information on how to redistribute this software under | |
| 14 * the terms of a license other than GNU General Public License | |
| 15 * contact TMate Software at support@hg4j.com | |
| 16 */ | |
| 17 package org.tmatesoft.hg.internal; | |
| 18 | |
| 19 import java.io.ByteArrayInputStream; | |
| 20 import java.io.ByteArrayOutputStream; | |
| 21 import java.io.IOException; | |
| 22 import java.io.InputStream; | |
| 23 import java.io.OutputStream; | |
| 24 import java.util.ArrayList; | |
| 25 import java.util.Collections; | |
| 26 import java.util.HashSet; | |
| 27 import java.util.Iterator; | |
| 28 import java.util.LinkedHashMap; | |
| 29 import java.util.LinkedHashSet; | |
| 30 import java.util.List; | |
| 31 | |
| 32 /** | |
| 33 * Simplistic parser to allow altering configuration files without touching user modifications/formatting/comments | |
| 34 * | |
| 35 * @author Artem Tikhomirov | |
| 36 * @author TMate Software Ltd. | |
| 37 */ | |
| 38 public class ConfigFileParser { | |
| 39 private enum ParseState {Initial, Section, Entry}; | |
| 40 private ParseState state = ParseState.Initial; | |
| 41 private int lastNonEmptyLineEndOffset = -1; | |
| 42 private String sectionName; | |
| 43 private int sectionStart = -1; | |
| 44 private String entryKey; | |
| 45 private int entryStart = -1; | |
| 46 private int valueStart = -1, valueEnd = -1; | |
| 47 private ArrayList<Entry> entries; | |
| 48 private ArrayList<Section> sections = new ArrayList<Section>(); | |
| 49 private byte[] contents; | |
| 50 | |
| 51 private List<String> deletions = new ArrayList<String>(5); | |
| 52 private List<String> additions = new ArrayList<String>(5), changes = new ArrayList<String>(5); | |
| 53 | |
| 54 | |
| 55 public boolean exists(String section, String key) { | |
| 56 assert contents != null; | |
| 57 for (Section s : sections) { | |
| 58 if (s.name.equals(section)) { | |
| 59 for (Entry e : s.entries) { | |
| 60 if (e.name.equals(key)) { | |
| 61 return true; | |
| 62 } | |
| 63 } | |
| 64 return false; | |
| 65 } | |
| 66 } | |
| 67 return false; | |
| 68 } | |
| 69 | |
| 70 public void add(String section, String key, String newValue) { | |
| 71 additions.add(section); | |
| 72 additions.add(key); | |
| 73 additions.add(newValue); | |
| 74 } | |
| 75 | |
| 76 public void change(String section, String key, String newValue) { | |
| 77 changes.add(section); | |
| 78 changes.add(key); | |
| 79 changes.add(newValue); | |
| 80 } | |
| 81 | |
| 82 public void delete(String section, String key) { | |
| 83 deletions.add(section); | |
| 84 deletions.add(key); | |
| 85 } | |
| 86 | |
| 87 public void parse(InputStream is) throws IOException { | |
| 88 state = ParseState.Initial; | |
| 89 sections.clear(); | |
| 90 contents = null; | |
| 91 ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); | |
| 92 ByteArrayOutputStream line = new ByteArrayOutputStream(80); | |
| 93 int offset = 0; | |
| 94 int lineOffset = -1; | |
| 95 int lineNumber = 1; | |
| 96 boolean crDetected = false; // true when previous char was \r | |
| 97 int b; | |
| 98 while ( (b = is.read()) != -1) { | |
| 99 bos.write(b); | |
| 100 if (b == '\n' || b == '\r') { | |
| 101 if (line.size() > 0) { | |
| 102 processLine(lineNumber, lineOffset, line.toByteArray()); | |
| 103 line.reset(); | |
| 104 lineOffset = -1; | |
| 105 lastNonEmptyLineEndOffset = bos.size() - 1; // offset points to EOL char | |
| 106 } | |
| 107 // else: XXX does empty line closes entry??? | |
| 108 // when \n follows \r, increment line count only once | |
| 109 if (!(b == '\n' && crDetected)) { | |
| 110 lineNumber++; | |
| 111 } | |
| 112 crDetected = b == '\r'; | |
| 113 } else { | |
| 114 crDetected = false; | |
| 115 if (line.size() == 0) { | |
| 116 lineOffset = offset; | |
| 117 } | |
| 118 line.write(b); | |
| 119 } | |
| 120 offset++; | |
| 121 } | |
| 122 // handle last line in case it's not EOL-terminated | |
| 123 if (line.size() > 0) { | |
| 124 processLine(lineNumber, lineOffset, line.toByteArray()); | |
| 125 // might need it for #closeSection() below | |
| 126 lastNonEmptyLineEndOffset = bos.size(); | |
| 127 } | |
| 128 if (state == ParseState.Entry) { | |
| 129 closeEntry(); | |
| 130 } | |
| 131 if (state == ParseState.Section) { | |
| 132 closeSection(); | |
| 133 } | |
| 134 contents = bos.toByteArray(); | |
| 135 } | |
| 136 | |
| 137 public void update(OutputStream out) throws IOException { | |
| 138 if (contents == null) { | |
| 139 throw new IOException("Shall parse first"); | |
| 140 } | |
| 141 HashSet<String> processedSections = new HashSet<String>(); | |
| 142 int contentsOffset = 0; | |
| 143 for (Section section : sections) { | |
| 144 LinkedHashMap<String,String> additionsInSection = new LinkedHashMap<String,String>(); | |
| 145 LinkedHashMap<String,String> changesInSection = new LinkedHashMap<String,String>(); | |
| 146 LinkedHashSet<String> deletionsInSection = new LinkedHashSet<String>(); | |
| 147 if (!processedSections.contains(section.name)) { | |
| 148 for (Iterator<String> it = additions.iterator(); it.hasNext();) { | |
| 149 String s = it.next(), k = it.next(), v = it.next(); | |
| 150 if (section.name.equals(s)) { | |
| 151 additionsInSection.put(k, v); | |
| 152 } | |
| 153 } | |
| 154 for (Iterator<String> it = changes.iterator(); it.hasNext();) { | |
| 155 String s = it.next(), k = it.next(), v = it.next(); | |
| 156 if (section.name.equals(s)) { | |
| 157 changesInSection.put(k, v); | |
| 158 } | |
| 159 } | |
| 160 for (Iterator<String> it = deletions.iterator(); it.hasNext();) { | |
| 161 String s = it.next(), k = it.next(); | |
| 162 if (section.name.equals(s)) { | |
| 163 deletionsInSection.add(k); | |
| 164 } | |
| 165 } | |
| 166 } | |
| 167 for (Entry e : section.entries) { | |
| 168 if (deletionsInSection.contains(e.name)) { | |
| 169 // write up to key start only | |
| 170 out.write(contents, contentsOffset, e.start - contentsOffset); | |
| 171 contentsOffset = e.valueEnd + 1; | |
| 172 } else if (changesInSection.containsKey(e.name)) { | |
| 173 if (e.valueStart == -1) { | |
| 174 // e.valueEnd determines insertion point | |
| 175 out.write(contents, contentsOffset, e.valueEnd + 1 - contentsOffset); | |
| 176 } else { | |
| 177 // e.valueEnd points to last character of the value | |
| 178 out.write(contents, contentsOffset, e.valueStart - contentsOffset); | |
| 179 } | |
| 180 String value = changesInSection.get(e.name); | |
| 181 out.write(value == null ? new byte[0] : value.getBytes()); | |
| 182 contentsOffset = e.valueEnd + 1; | |
| 183 } | |
| 184 // else: keep contentsOffset to point to first uncopied character | |
| 185 } | |
| 186 if (section.entries.length == 0) { | |
| 187 // no entries, empty or only comments, perhaps. | |
| 188 // use end of last meaningful line (whether [section] or comment string), | |
| 189 // which points to newline character | |
| 190 out.write(contents, contentsOffset, section.end - contentsOffset); | |
| 191 contentsOffset = section.end; | |
| 192 // since it's tricky to track \n or \r\n with lastNonEmptyLineEndOffset, | |
| 193 // we copy up to the line delimiter and insert new lines, if any, with \n prepended, | |
| 194 // so that original EOL will be moved to the very end of the section. | |
| 195 // Indeed, would be better to insert *after* lastNonEmptyLineEndOffset, | |
| 196 // but I don't want to complicate #parse (if line.size() > 0 part) method. | |
| 197 // Hope, this won't make too much trouble (if any, at all - | |
| 198 // if String.format translates \n to system EOL, then nobody would notice) | |
| 199 } | |
| 200 if (!additionsInSection.isEmpty()) { | |
| 201 // make sure additions are written once everything else is there | |
| 202 out.write(contents, contentsOffset, section.end - contentsOffset); | |
| 203 contentsOffset = section.end; | |
| 204 for (String k : additionsInSection.keySet()) { | |
| 205 String v = additionsInSection.get(k); | |
| 206 out.write(String.format("\n%s = %s", k, v == null ? "" : v).getBytes()); | |
| 207 } | |
| 208 } | |
| 209 // if section comes more than once, update only first one. | |
| 210 processedSections.add(section.name); | |
| 211 } | |
| 212 out.write(contents, contentsOffset, contents.length - contentsOffset); | |
| 213 } | |
| 214 | |
| 215 private void processLine(int lineNumber, int offset, byte[] line) throws IOException { | |
| 216 int localOffset = 0, i = 0; | |
| 217 while (i < line.length && Character.isWhitespace(line[i])) { | |
| 218 i++; | |
| 219 } | |
| 220 if (i == line.length) { | |
| 221 return; | |
| 222 } | |
| 223 localOffset = i; | |
| 224 if (line[i] == '[') { | |
| 225 if (state == ParseState.Entry) { | |
| 226 closeEntry(); | |
| 227 } | |
| 228 if (state == ParseState.Section) { | |
| 229 closeSection(); | |
| 230 } | |
| 231 | |
| 232 while (i < line.length && line[i] != ']') { | |
| 233 i++; | |
| 234 } | |
| 235 if (i == line.length) { | |
| 236 throw new IOException(String.format("Can't find closing ']' for section name in line %d", lineNumber)); | |
| 237 } | |
| 238 sectionName = new String(line, localOffset+1, i-localOffset-1); | |
| 239 sectionStart = offset + localOffset; | |
| 240 state = ParseState.Section; | |
| 241 } else if (line[i] == '#' || line[i] == ';') { | |
| 242 // comment line, nothing to process | |
| 243 return; | |
| 244 } else { | |
| 245 // entry | |
| 246 if (state == ParseState.Initial) { | |
| 247 throw new IOException(String.format("Line %d doesn't belong to any section", lineNumber)); | |
| 248 } | |
| 249 if (localOffset > 0) { | |
| 250 if (state == ParseState.Section) { | |
| 251 throw new IOException(String.format("Non-indented key is expected in line %d", lineNumber)); | |
| 252 } | |
| 253 assert state == ParseState.Entry; | |
| 254 // whitespace-indented continuation of the previous entry | |
| 255 if (valueStart == -1) { | |
| 256 // value didn't start at the same line the key was found at | |
| 257 valueStart = offset + localOffset; | |
| 258 } | |
| 259 // value ends with eol (assumption is trailing comments are not allowed) | |
| 260 valueEnd = offset + line.length - 1; | |
| 261 } else { | |
| 262 if (state == ParseState.Entry) { | |
| 263 closeEntry(); | |
| 264 } | |
| 265 assert state == ParseState.Section; | |
| 266 // it's a new entry | |
| 267 state = ParseState.Entry; | |
| 268 // get name of the entry | |
| 269 while (i < line.length && !Character.isWhitespace(line[i]) && line[i] != '=') { | |
| 270 i++; | |
| 271 } | |
| 272 if (i == line.length) { | |
| 273 throw new IOException(String.format("Can't process entry in line %d", lineNumber)); | |
| 274 } | |
| 275 entryKey = new String(line, localOffset, i - localOffset); | |
| 276 entryStart = offset + localOffset; | |
| 277 // look for '=' after key name | |
| 278 while (i < line.length && line[i] != '=') { | |
| 279 i++; | |
| 280 } | |
| 281 if (i == line.length) { | |
| 282 throw new IOException(String.format("Can't find '=' after key %s in line %d", entryKey, lineNumber)); | |
| 283 } | |
| 284 // skip whitespaces after '=' | |
| 285 i++; // line[i] == '=' | |
| 286 while (i < line.length && Character.isWhitespace(line[i])) { | |
| 287 i++; | |
| 288 } | |
| 289 // valueStart might be -1 in case no value is specified in the same line as key | |
| 290 // but valueEnd is always initialized just in case there's no next, value continuation line | |
| 291 if (i == line.length) { | |
| 292 valueStart = -1; | |
| 293 } else { | |
| 294 valueStart = offset + i; | |
| 295 } | |
| 296 | |
| 297 // if trailing comments are allowed, shall | |
| 298 // look up comment char and set valueEnd to its position-1 | |
| 299 valueEnd = offset + line.length - 1; | |
| 300 } | |
| 301 } | |
| 302 } | |
| 303 | |
| 304 private void closeSection() { | |
| 305 assert state == ParseState.Section; | |
| 306 assert sectionName != null; | |
| 307 assert lastNonEmptyLineEndOffset != -1; | |
| 308 Section s = new Section(sectionName, sectionStart, lastNonEmptyLineEndOffset, entries == null ? Collections.<Entry>emptyList() : entries); | |
| 309 sections.add(s); | |
| 310 sectionName = null; | |
| 311 sectionStart = -1; | |
| 312 state = ParseState.Initial; | |
| 313 entries = null; | |
| 314 } | |
| 315 | |
| 316 private void closeEntry() { | |
| 317 assert state == ParseState.Entry; | |
| 318 assert entryKey != null; | |
| 319 state = ParseState.Section; | |
| 320 Entry e = new Entry(entryKey, entryStart, valueStart, valueEnd); | |
| 321 if (entries == null) { | |
| 322 entries = new ArrayList<Entry>(); | |
| 323 } | |
| 324 entries.add(e); | |
| 325 entryKey = null; | |
| 326 entryStart = valueStart = valueEnd -1; | |
| 327 } | |
| 328 | |
| 329 | |
| 330 private static class Block { | |
| 331 public final int start; | |
| 332 Block(int s) { | |
| 333 start = s; | |
| 334 } | |
| 335 } | |
| 336 | |
| 337 private static class Entry extends Block { | |
| 338 public final int valueStart, valueEnd; | |
| 339 public final String name; | |
| 340 | |
| 341 Entry(String n, int s, int vs, int ve) { | |
| 342 super(s); | |
| 343 name = n; | |
| 344 valueStart = vs; | |
| 345 valueEnd = ve; | |
| 346 } | |
| 347 } | |
| 348 | |
| 349 private static class Section extends Block { | |
| 350 public final String name; | |
| 351 public final Entry[] entries; | |
| 352 public final int end; | |
| 353 | |
| 354 Section(String n, int s, int endOffset, List<Entry> e) { | |
| 355 super(s); | |
| 356 name = n; | |
| 357 end = endOffset; | |
| 358 entries = new Entry[e.size()]; | |
| 359 e.toArray(entries); | |
| 360 } | |
| 361 } | |
| 362 | |
| 363 public static void main(String[] args) throws Exception { | |
| 364 ConfigFileParser p = new ConfigFileParser(); | |
| 365 p.parse(new ByteArrayInputStream(xx.getBytes())); | |
| 366 System.out.println(">>>"); | |
| 367 System.out.println(xx); | |
| 368 System.out.println("==="); | |
| 369 p.add("sect1", "key5", "x"); | |
| 370 ByteArrayOutputStream out = new ByteArrayOutputStream(xx.length()); | |
| 371 p.update(out); | |
| 372 System.out.println(new String(out.toByteArray())); | |
| 373 /* | |
| 374 for (Section s : p.sections) { | |
| 375 System.out.printf("[%s@%d]\n", s.name, s.start); | |
| 376 for (Entry e : s.entries) { | |
| 377 System.out.printf("%s@%d = %d..%d\n", e.name, e.start, e.valueStart, e.valueEnd); | |
| 378 } | |
| 379 } | |
| 380 */ | |
| 381 } | |
| 382 private static final String xx = "#comment1\n [sect1]\nkey = value #not a comment2\n#comment3\nkey2= \nkey3 = \n value1, #cc\n value2\nkey4 = v1,\n v2 \n ,v3\n\n\n[sect2]\nx = a"; | |
| 383 } |
