001 package ezvcard.io.text; 002 003 import java.io.Closeable; 004 import java.io.File; 005 import java.io.FileNotFoundException; 006 import java.io.FileReader; 007 import java.io.IOException; 008 import java.io.InputStream; 009 import java.io.InputStreamReader; 010 import java.io.Reader; 011 import java.io.StringReader; 012 import java.nio.charset.Charset; 013 import java.util.ArrayList; 014 import java.util.LinkedList; 015 import java.util.List; 016 017 import ezvcard.Messages; 018 import ezvcard.VCard; 019 import ezvcard.VCardDataType; 020 import ezvcard.VCardVersion; 021 import ezvcard.io.CannotParseException; 022 import ezvcard.io.EmbeddedVCardException; 023 import ezvcard.io.SkipMeException; 024 import ezvcard.io.scribe.RawPropertyScribe; 025 import ezvcard.io.scribe.ScribeIndex; 026 import ezvcard.io.scribe.VCardPropertyScribe; 027 import ezvcard.io.scribe.VCardPropertyScribe.Result; 028 import ezvcard.parameter.Encoding; 029 import ezvcard.parameter.VCardParameters; 030 import ezvcard.property.Address; 031 import ezvcard.property.Label; 032 import ezvcard.property.RawProperty; 033 import ezvcard.property.VCardProperty; 034 import ezvcard.util.IOUtils; 035 import ezvcard.util.org.apache.commons.codec.DecoderException; 036 import ezvcard.util.org.apache.commons.codec.net.QuotedPrintableCodec; 037 038 /* 039 Copyright (c) 2013, Michael Angstadt 040 All rights reserved. 041 042 Redistribution and use in source and binary forms, with or without 043 modification, are permitted provided that the following conditions are met: 044 045 1. Redistributions of source code must retain the above copyright notice, this 046 list of conditions and the following disclaimer. 047 2. Redistributions in binary form must reproduce the above copyright notice, 048 this list of conditions and the following disclaimer in the documentation 049 and/or other materials provided with the distribution. 050 051 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 052 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 053 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 054 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 055 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 056 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 057 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 058 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 059 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 060 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 061 062 The views and conclusions contained in the software and documentation are those 063 of the authors and should not be interpreted as representing official policies, 064 either expressed or implied, of the FreeBSD Project. 065 */ 066 067 /** 068 * <p> 069 * Parses {@link VCard} objects from a plain-text vCard data stream. 070 * </p> 071 * <p> 072 * <b>Example:</b> 073 * 074 * <pre class="brush:java"> 075 * File file = new File("vcards.vcf"); 076 * VCardReader vcardReader = new VCardReader(file); 077 * VCard vcard; 078 * while ((vcard = vcardReader.readNext()) != null){ 079 * ... 080 * } 081 * vcardReader.close(); 082 * </pre> 083 * 084 * </p> 085 * @author Michael Angstadt 086 */ 087 public class VCardReader implements Closeable { 088 private final List<String> warnings = new ArrayList<String>(); 089 private ScribeIndex index = new ScribeIndex(); 090 private Charset defaultQuotedPrintableCharset; 091 private final VCardRawReader reader; 092 093 /** 094 * Creates a reader that parses vCards from a string. 095 * @param str the string to read the vCards from 096 */ 097 public VCardReader(String str) { 098 this(new StringReader(str)); 099 } 100 101 /** 102 * Creates a reader that parses vCards from an input stream. 103 * @param in the input stream to read the vCards from 104 */ 105 public VCardReader(InputStream in) { 106 this(new InputStreamReader(in)); 107 } 108 109 /** 110 * Creates a reader that parses vCards from a file. 111 * @param file the file to read the vCards from 112 * @throws FileNotFoundException if the file doesn't exist 113 */ 114 public VCardReader(File file) throws FileNotFoundException { 115 this(new FileReader(file)); 116 } 117 118 /** 119 * Creates a reader that parses vCards from a reader. 120 * @param reader the reader to read the vCards from 121 */ 122 public VCardReader(Reader reader) { 123 this.reader = new VCardRawReader(reader); 124 defaultQuotedPrintableCharset = this.reader.getEncoding(); 125 if (defaultQuotedPrintableCharset == null) { 126 defaultQuotedPrintableCharset = Charset.defaultCharset(); 127 } 128 } 129 130 /** 131 * Gets whether the reader will decode parameter values that use circumflex 132 * accent encoding (enabled by default). This escaping mechanism allows 133 * newlines and double quotes to be included in parameter values. 134 * @return true if circumflex accent decoding is enabled, false if not 135 * @see VCardRawReader#isCaretDecodingEnabled() 136 */ 137 public boolean isCaretDecodingEnabled() { 138 return reader.isCaretDecodingEnabled(); 139 } 140 141 /** 142 * Sets whether the reader will decode parameter values that use circumflex 143 * accent encoding (enabled by default). This escaping mechanism allows 144 * newlines and double quotes to be included in parameter values. 145 * @param enable true to use circumflex accent decoding, false not to 146 * @see VCardRawReader#setCaretDecodingEnabled(boolean) 147 */ 148 public void setCaretDecodingEnabled(boolean enable) { 149 reader.setCaretDecodingEnabled(enable); 150 } 151 152 /** 153 * <p> 154 * Gets the character set to use when decoding quoted-printable values if 155 * the property has no CHARSET parameter, or if the CHARSET parameter is not 156 * a valid character set. 157 * </p> 158 * <p> 159 * By default, the Reader's character encoding will be used. If the Reader 160 * has no character encoding, then the system's default character encoding 161 * will be used. 162 * </p> 163 * @return the character set 164 */ 165 public Charset getDefaultQuotedPrintableCharset() { 166 return defaultQuotedPrintableCharset; 167 } 168 169 /** 170 * <p> 171 * Sets the character set to use when decoding quoted-printable values if 172 * the property has no CHARSET parameter, or if the CHARSET parameter is not 173 * a valid character set. 174 * </p> 175 * <p> 176 * By default, the Reader's character encoding will be used. If the Reader 177 * has no character encoding, then the system's default character encoding 178 * will be used. 179 * </p> 180 * @param charset the character set 181 */ 182 public void setDefaultQuotedPrintableCharset(Charset charset) { 183 defaultQuotedPrintableCharset = charset; 184 } 185 186 /** 187 * <p> 188 * Registers a property scribe. This is the same as calling: 189 * </p> 190 * <p> 191 * {@code getScribeIndex().register(scribe)} 192 * </p> 193 * @param scribe the scribe to register 194 */ 195 public void registerScribe(VCardPropertyScribe<? extends VCardProperty> scribe) { 196 index.register(scribe); 197 } 198 199 /** 200 * Gets the scribe index. 201 * @return the scribe index 202 */ 203 public ScribeIndex getScribeIndex() { 204 return index; 205 } 206 207 /** 208 * Sets the scribe index. 209 * @param index the scribe index 210 */ 211 public void setScribeIndex(ScribeIndex index) { 212 this.index = index; 213 } 214 215 /** 216 * Gets the warnings from the last vCard that was unmarshalled. This list is 217 * reset every time a new vCard is read. 218 * @return the warnings or empty list if there were no warnings 219 */ 220 public List<String> getWarnings() { 221 return new ArrayList<String>(warnings); 222 } 223 224 /** 225 * Reads the next vCard from the data stream. 226 * @return the next vCard or null if there are no more 227 * @throws IOException if there's a problem reading from the stream 228 */ 229 public VCard readNext() throws IOException { 230 if (reader.eof()) { 231 return null; 232 } 233 234 warnings.clear(); 235 236 VCardDataStreamListenerImpl listener = new VCardDataStreamListenerImpl(); 237 reader.start(listener); 238 239 return listener.root; 240 } 241 242 /** 243 * Assigns names to all nameless parameters. v3.0 and v4.0 requires all 244 * parameters to have names, but v2.1 does not. 245 * @param parameters the parameters 246 */ 247 private void handleNamelessParameters(VCardParameters parameters) { 248 List<String> namelessParamValues = parameters.get(null); 249 for (String paramValue : namelessParamValues) { 250 String paramName; 251 if (VCardDataType.find(paramValue) != null) { 252 paramName = VCardParameters.VALUE; 253 } else if (Encoding.find(paramValue) != null) { 254 paramName = VCardParameters.ENCODING; 255 } else { 256 //otherwise, assume it's a TYPE 257 paramName = VCardParameters.TYPE; 258 } 259 parameters.put(paramName, paramValue); 260 } 261 parameters.removeAll(null); 262 } 263 264 /** 265 * <p> 266 * Accounts for multi-valued TYPE parameters being enclosed entirely in 267 * double quotes (for example: ADR;TYPE="home,work"). 268 * </p> 269 * <p> 270 * Many examples throughout the 4.0 specs show TYPE parameters being encoded 271 * in this way. This conflicts with the ABNF and is noted in the errata. 272 * This method will split the value by comma incase the vendor implemented 273 * it this way. 274 * </p> 275 * @param parameters the parameters 276 */ 277 private void handleQuotedMultivaluedTypeParams(VCardParameters parameters) { 278 //account for multi-valued TYPE parameters being enclosed entirely in double quotes 279 //e.g. ADR;TYPE="home,work" 280 for (String typeParam : parameters.getTypes()) { 281 if (!typeParam.contains(",")) { 282 continue; 283 } 284 285 parameters.removeTypes(); 286 for (String splitValue : typeParam.split(",")) { 287 parameters.addType(splitValue); 288 } 289 } 290 } 291 292 /** 293 * Decodes the property value if it's encoded in quoted-printable encoding. 294 * Quoted-printable encoding is only supported in v2.1. 295 * @param name the property name 296 * @param parameters the parameters 297 * @param value the property value 298 * @return the decoded property value 299 */ 300 private String decodeQuotedPrintable(String name, VCardParameters parameters, String value) { 301 if (parameters.getEncoding() != Encoding.QUOTED_PRINTABLE) { 302 return value; 303 } 304 305 //remove encoding parameter 306 parameters.setEncoding(null); 307 308 //determine the character set 309 Charset charset = null; 310 String charsetStr = parameters.getCharset(); 311 if (charsetStr == null) { 312 charset = defaultQuotedPrintableCharset; 313 } else { 314 try { 315 charset = Charset.forName(charsetStr); 316 } catch (Throwable t) { 317 charset = defaultQuotedPrintableCharset; 318 319 //the given charset was invalid, so add a warning 320 addWarning(name, 23, charsetStr, charset.name()); 321 } 322 } 323 324 QuotedPrintableCodec codec = new QuotedPrintableCodec(charset.name()); 325 try { 326 return codec.decode(value); 327 } catch (DecoderException e) { 328 //only thrown if the charset is invalid, which we know will never happen because we're using a Charset object 329 throw new RuntimeException(e); 330 } 331 } 332 333 /** 334 * Closes the underlying {@link Reader} object. 335 */ 336 public void close() throws IOException { 337 reader.close(); 338 } 339 340 private void addWarning(String propertyName, int code, Object... args) { 341 String message = Messages.INSTANCE.getParseMessage(code, args); 342 addWarning(propertyName, message); 343 } 344 345 private void addWarning(String propertyName, String message) { 346 int code = (propertyName == null) ? 37 : 36; 347 int line = reader.getLineNum(); 348 349 String warning = Messages.INSTANCE.getParseMessage(code, line, propertyName, message); 350 warnings.add(warning); 351 } 352 353 private class VCardDataStreamListenerImpl implements VCardRawReader.VCardDataStreamListener { 354 private VCard root; 355 private final List<Label> labels = new ArrayList<Label>(); 356 private final LinkedList<VCard> vcardStack = new LinkedList<VCard>(); 357 private EmbeddedVCardException embeddedVCardException; 358 359 public void beginComponent(String name) { 360 if (!"VCARD".equalsIgnoreCase(name)) { 361 return; 362 } 363 364 VCard vcard = new VCard(); 365 366 //initialize version to 2.1, since the VERSION property can exist anywhere in a 2.1 vCard 367 vcard.setVersion(VCardVersion.V2_1); 368 369 vcardStack.add(vcard); 370 371 if (root == null) { 372 root = vcard; 373 } 374 375 if (embeddedVCardException != null) { 376 embeddedVCardException.injectVCard(vcard); 377 embeddedVCardException = null; 378 } 379 } 380 381 public void readVersion(VCardVersion version) { 382 if (vcardStack.isEmpty()) { 383 //not in a "VCARD" component 384 return; 385 } 386 387 vcardStack.getLast().setVersion(version); 388 } 389 390 public void readProperty(String group, String name, VCardParameters parameters, String value) { 391 if (vcardStack.isEmpty()) { 392 //not in a "VCARD" component 393 return; 394 } 395 396 if (embeddedVCardException != null) { 397 //the next property was supposed to be the start of a nested vCard, but it wasn't 398 embeddedVCardException.injectVCard(null); 399 embeddedVCardException = null; 400 } 401 402 VCard curVCard = vcardStack.getLast(); 403 VCardVersion version = curVCard.getVersion(); 404 405 //massage the parameters 406 handleNamelessParameters(parameters); 407 handleQuotedMultivaluedTypeParams(parameters); 408 409 //decode property value from quoted-printable 410 value = decodeQuotedPrintable(name, parameters, value); 411 412 //get the scribe 413 VCardPropertyScribe<? extends VCardProperty> scribe = index.getPropertyScribe(name); 414 if (scribe == null) { 415 scribe = new RawPropertyScribe(name); 416 } 417 418 //get the data type 419 VCardDataType dataType = parameters.getValue(); 420 if (dataType == null) { 421 //use the default data type if there is no VALUE parameter 422 dataType = scribe.defaultDataType(version); 423 } else { 424 //remove VALUE parameter if it is set 425 parameters.setValue(null); 426 } 427 428 VCardProperty property; 429 try { 430 Result<? extends VCardProperty> result = scribe.parseText(value, dataType, version, parameters); 431 432 for (String warning : result.getWarnings()) { 433 addWarning(name, warning); 434 } 435 436 property = result.getProperty(); 437 property.setGroup(group); 438 439 if (property instanceof Label) { 440 //LABELs must be treated specially so they can be matched up with their ADRs 441 labels.add((Label) property); 442 return; 443 } 444 } catch (SkipMeException e) { 445 addWarning(name, 22, e.getMessage()); 446 return; 447 } catch (CannotParseException e) { 448 addWarning(name, 25, value, e.getMessage()); 449 property = new RawProperty(name, value); 450 property.setGroup(group); 451 } catch (EmbeddedVCardException e) { 452 //parse an embedded vCard (i.e. the AGENT type) 453 property = e.getProperty(); 454 455 if (value.length() == 0 || version == VCardVersion.V2_1) { 456 //a nested vCard is expected to be next (2.1 style) 457 embeddedVCardException = e; 458 } else { 459 //the property value should be an embedded vCard (3.0 style) 460 value = VCardPropertyScribe.unescape(value); 461 462 VCardReader agentReader = new VCardReader(value); 463 try { 464 VCard nestedVCard = agentReader.readNext(); 465 if (nestedVCard != null) { 466 e.injectVCard(nestedVCard); 467 } 468 } catch (IOException e2) { 469 //shouldn't be thrown because we're reading from a string 470 } finally { 471 for (String w : agentReader.getWarnings()) { 472 addWarning(name, 26, w); 473 } 474 IOUtils.closeQuietly(agentReader); 475 } 476 } 477 } 478 479 curVCard.addProperty(property); 480 } 481 482 public void endComponent(String name) { 483 if (vcardStack.isEmpty()) { 484 //not in a "VCARD" component 485 return; 486 } 487 488 if (!"VCARD".equalsIgnoreCase(name)) { 489 //not a "VCARD" component 490 return; 491 } 492 493 VCard curVCard = vcardStack.removeLast(); 494 495 //assign labels to their addresses 496 for (Label label : labels) { 497 boolean orphaned = true; 498 for (Address adr : curVCard.getAddresses()) { 499 if (adr.getLabel() == null && adr.getTypes().equals(label.getTypes())) { 500 adr.setLabel(label.getValue()); 501 orphaned = false; 502 break; 503 } 504 } 505 if (orphaned) { 506 curVCard.addOrphanedLabel(label); 507 } 508 } 509 510 if (vcardStack.isEmpty()) { 511 throw new VCardRawReader.StopReadingException(); 512 } 513 } 514 515 public void invalidLine(String line) { 516 if (vcardStack.isEmpty()) { 517 //not in a "VCARD" component 518 return; 519 } 520 521 addWarning(null, 27, line); 522 } 523 524 public void invalidVersion(String version) { 525 if (vcardStack.isEmpty()) { 526 //not in a "VCARD" component 527 return; 528 } 529 530 addWarning("VERSION", 28, version); 531 } 532 } 533 }