001 package ezvcard.io.text; 002 003 import static ezvcard.util.StringUtils.NEWLINE; 004 005 import java.io.Closeable; 006 import java.io.IOException; 007 import java.io.Reader; 008 import java.nio.charset.Charset; 009 010 import ezvcard.VCardException; 011 import ezvcard.VCardVersion; 012 import ezvcard.parameter.VCardParameters; 013 import ezvcard.util.StringUtils; 014 015 /* 016 Copyright (c) 2013, Michael Angstadt 017 All rights reserved. 018 019 Redistribution and use in source and binary forms, with or without 020 modification, are permitted provided that the following conditions are met: 021 022 1. Redistributions of source code must retain the above copyright notice, this 023 list of conditions and the following disclaimer. 024 2. Redistributions in binary form must reproduce the above copyright notice, 025 this list of conditions and the following disclaimer in the documentation 026 and/or other materials provided with the distribution. 027 028 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 029 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 030 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 031 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 032 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 033 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 034 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 035 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 036 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 037 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 038 039 The views and conclusions contained in the software and documentation are those 040 of the authors and should not be interpreted as representing official policies, 041 either expressed or implied, of the FreeBSD Project. 042 */ 043 044 /** 045 * Parses a vCard data stream. 046 * @author Michael Angstadt 047 */ 048 public class VCardRawReader implements Closeable { 049 private final FoldedLineReader reader; 050 private boolean caretDecodingEnabled = true; 051 private boolean eof = false; 052 private VCardVersion version = VCardVersion.V2_1; 053 054 /** 055 * Creates a new reader. 056 * @param reader the reader to the data stream 057 */ 058 public VCardRawReader(Reader reader) { 059 this.reader = new FoldedLineReader(reader); 060 } 061 062 /** 063 * Gets the line number of the last line that was read. 064 * @return the line number 065 */ 066 public int getLineNum() { 067 return reader.getLineNum(); 068 } 069 070 /** 071 * Starts or continues reading from the vCard data stream. 072 * @param listener handles the vCard data as it is read off the wire 073 * @throws IOException if there is an I/O problem 074 */ 075 public void start(VCardDataStreamListener listener) throws IOException { 076 String line; 077 while ((line = reader.readLine()) != null) { 078 try { 079 parseLine(line, listener); 080 } catch (StopReadingException e) { 081 return; 082 } 083 } 084 eof = true; 085 } 086 087 private void parseLine(String line, VCardDataStreamListener listener) { 088 String group = null; 089 String propertyName = null; 090 VCardParameters parameters = new VCardParameters(); 091 String value = null; 092 093 char escapeChar = 0; //is the next char escaped? 094 boolean inQuotes = false; //are we inside of double quotes? 095 StringBuilder buffer = new StringBuilder(); 096 String curParamName = null; 097 for (int i = 0; i < line.length(); i++) { 098 char ch = line.charAt(i); 099 if (escapeChar != 0) { 100 if (escapeChar == '\\') { 101 if (ch == '\\') { 102 buffer.append(ch); 103 } else if (ch == 'n' || ch == 'N') { 104 //newlines appear as "\n" or "\N" (see RFC 2426 p.7) 105 buffer.append(NEWLINE); 106 } else if (ch == '"' && version != VCardVersion.V2_1) { 107 //double quotes don't need to be escaped in 2.1 parameter values because they have no special meaning 108 buffer.append(ch); 109 } else if (ch == ';' && version == VCardVersion.V2_1) { 110 //semi-colons can only be escaped in 2.1 parameter values (see section 2 of specs) 111 //if a 3.0/4.0 param value has semi-colons, the value should be surrounded in double quotes 112 buffer.append(ch); 113 } else { 114 //treat the escape character as a normal character because it's not a valid escape sequence 115 buffer.append(escapeChar).append(ch); 116 } 117 } else if (escapeChar == '^') { 118 if (ch == '^') { 119 buffer.append(ch); 120 } else if (ch == 'n') { 121 buffer.append(NEWLINE); 122 } else if (ch == '\'') { 123 buffer.append('"'); 124 } else { 125 //treat the escape character as a normal character because it's not a valid escape sequence 126 buffer.append(escapeChar).append(ch); 127 } 128 } 129 escapeChar = 0; 130 } else if (ch == '\\' || (ch == '^' && version != VCardVersion.V2_1 && caretDecodingEnabled)) { 131 escapeChar = ch; 132 } else if (ch == '.' && group == null && propertyName == null) { 133 group = buffer.toString(); 134 buffer.setLength(0); 135 } else if ((ch == ';' || ch == ':') && !inQuotes) { 136 if (propertyName == null) { 137 propertyName = buffer.toString(); 138 } else { 139 //parameter value 140 String paramValue = buffer.toString(); 141 if (version == VCardVersion.V2_1) { 142 //2.1 allows whitespace to surround the "=", so remove it 143 paramValue = StringUtils.ltrim(paramValue); 144 } 145 parameters.put(curParamName, paramValue); 146 curParamName = null; 147 } 148 buffer.setLength(0); 149 150 if (ch == ':') { 151 if (i < line.length() - 1) { 152 value = line.substring(i + 1); 153 } else { 154 value = ""; 155 } 156 break; 157 } 158 } else if (ch == ',' && !inQuotes && version != VCardVersion.V2_1) { 159 //multi-valued parameter 160 parameters.put(curParamName, buffer.toString()); 161 buffer.setLength(0); 162 } else if (ch == '=' && curParamName == null) { 163 //parameter name 164 String paramName = buffer.toString(); 165 if (version == VCardVersion.V2_1) { 166 //2.1 allows whitespace to surround the "=", so remove it 167 paramName = StringUtils.rtrim(paramName); 168 } 169 curParamName = paramName; 170 buffer.setLength(0); 171 } else if (ch == '"' && version != VCardVersion.V2_1) { 172 //2.1 doesn't use the quoting mechanism 173 inQuotes = !inQuotes; 174 } else { 175 buffer.append(ch); 176 } 177 } 178 179 if (propertyName == null || value == null) { 180 listener.invalidLine(line); 181 return; 182 } 183 if ("VERSION".equalsIgnoreCase(propertyName)) { 184 VCardVersion version = VCardVersion.valueOfByStr(value.trim()); 185 if (version == null) { 186 listener.invalidVersion(value); 187 } else { 188 this.version = version; 189 listener.readVersion(version); 190 } 191 return; 192 } 193 if ("BEGIN".equalsIgnoreCase(propertyName)) { 194 listener.beginComponent(value.trim()); 195 return; 196 } 197 if ("END".equalsIgnoreCase(propertyName)) { 198 listener.endComponent(value.trim()); 199 return; 200 } 201 listener.readProperty(group, propertyName, parameters, value); 202 } 203 204 /** 205 * <p> 206 * Gets whether the reader will decode parameter values that use circumflex 207 * accent encoding (enabled by default). This escaping mechanism allows 208 * newlines and double quotes to be included in parameter values. 209 * </p> 210 * 211 * <table border="1"> 212 * <tr> 213 * <th>Raw Character</th> 214 * <th>Encoded Character</th> 215 * </tr> 216 * <tr> 217 * <td>{@code "}</td> 218 * <td>{@code ^'}</td> 219 * </tr> 220 * <tr> 221 * <td><i>newline</i></td> 222 * <td>{@code ^n}</td> 223 * </tr> 224 * <tr> 225 * <td>{@code ^}</td> 226 * <td>{@code ^^}</td> 227 * </tr> 228 * </table> 229 * 230 * <p> 231 * Example: 232 * </p> 233 * 234 * <pre> 235 * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt 236 * sburgh, PA 15212":40.446816;80.00566 237 * </pre> 238 * 239 * @return true if circumflex accent decoding is enabled, false if not 240 * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a> 241 */ 242 public boolean isCaretDecodingEnabled() { 243 return caretDecodingEnabled; 244 } 245 246 /** 247 * <p> 248 * Sets whether the reader will decode parameter values that use circumflex 249 * accent encoding (enabled by default). This escaping mechanism allows 250 * newlines and double quotes to be included in parameter values. 251 * </p> 252 * 253 * <table border="1"> 254 * <tr> 255 * <th>Raw Character</th> 256 * <th>Encoded Character</th> 257 * </tr> 258 * <tr> 259 * <td>{@code "}</td> 260 * <td>{@code ^'}</td> 261 * </tr> 262 * <tr> 263 * <td><i>newline</i></td> 264 * <td>{@code ^n}</td> 265 * </tr> 266 * <tr> 267 * <td>{@code ^}</td> 268 * <td>{@code ^^}</td> 269 * </tr> 270 * </table> 271 * 272 * <p> 273 * Example: 274 * </p> 275 * 276 * <pre> 277 * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt 278 * sburgh, PA 15212":geo:40.446816,-80.00566 279 * </pre> 280 * 281 * @param enable true to use circumflex accent decoding, false not to 282 * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a> 283 */ 284 public void setCaretDecodingEnabled(boolean enable) { 285 caretDecodingEnabled = enable; 286 } 287 288 /** 289 * Determines whether the end of the data stream has been reached. 290 * @return true if the end has been reached, false if not 291 */ 292 public boolean eof() { 293 return eof; 294 } 295 296 /** 297 * Gets the character encoding of the reader. 298 * @return the character encoding or null if none is defined 299 */ 300 public Charset getEncoding() { 301 return reader.getEncoding(); 302 } 303 304 /** 305 * Handles the vCard data as it is read off the data stream. Each one of 306 * this interface's methods may throw a {@link StopReadingException} at any 307 * time to force the parser to stop reading from the data stream. This will 308 * cause the reader to return from the {@link VCardRawReader#start} method. 309 * To continue reading from the data stream, simply call the 310 * {@link VCardRawReader#start} method again. 311 * @author Michael Angstadt 312 */ 313 public static interface VCardDataStreamListener { 314 /** 315 * Called when a component begins (when a "BEGIN:NAME" property is 316 * reached). 317 * @param name the component name (e.g. "VCARD") 318 * @throws StopReadingException to force the reader to stop reading from 319 * the data stream 320 */ 321 void beginComponent(String name); 322 323 /** 324 * Called when a property is read. 325 * @param group the group name or null if no group was defined 326 * @param name the property name (e.g. "VERSION") 327 * @param parameters the parameters 328 * @param value the property value 329 * @throws StopReadingException to force the reader to stop reading from 330 * the data stream 331 */ 332 void readProperty(String group, String name, VCardParameters parameters, String value); 333 334 /** 335 * Called when the vCard's VERSION property is read. 336 * @param version the version that was read 337 */ 338 void readVersion(VCardVersion version); 339 340 /** 341 * Called when a component ends (when a "END:NAME" property is reached). 342 * @param name the component name (e.g. "VCARD") 343 * @throws StopReadingException to force the reader to stop reading from 344 * the data stream 345 */ 346 void endComponent(String name); 347 348 /** 349 * Called when a line cannot be parsed. 350 * @param line the unparseable line 351 * @throws StopReadingException to force the reader to stop reading from 352 * the data stream 353 */ 354 void invalidLine(String line); 355 356 /** 357 * Called when an invalid VERSION property is encountered. 358 * @param version the invalid version 359 */ 360 void invalidVersion(String version); 361 } 362 363 /** 364 * Instructs a {@link VCardRawReader} to stop reading from the data stream 365 * when thrown from a {@link VCardDataStreamListener} implementation. 366 * @author Michael Angstadt 367 */ 368 @SuppressWarnings("serial") 369 public static class StopReadingException extends VCardException { 370 //empty 371 } 372 373 /** 374 * Closes the underlying {@link Reader} object. 375 */ 376 public void close() throws IOException { 377 reader.close(); 378 } 379 }