001 package ezvcard.io.text;
002
003 import java.io.Closeable;
004 import java.io.File;
005 import java.io.FileNotFoundException;
006 import java.io.FileReader;
007 import java.io.IOException;
008 import java.io.InputStream;
009 import java.io.InputStreamReader;
010 import java.io.Reader;
011 import java.io.StringReader;
012 import java.nio.charset.Charset;
013 import java.util.ArrayList;
014 import java.util.LinkedList;
015 import java.util.List;
016
017 import ezvcard.Messages;
018 import ezvcard.VCard;
019 import ezvcard.VCardDataType;
020 import ezvcard.VCardVersion;
021 import ezvcard.io.CannotParseException;
022 import ezvcard.io.EmbeddedVCardException;
023 import ezvcard.io.SkipMeException;
024 import ezvcard.io.scribe.RawPropertyScribe;
025 import ezvcard.io.scribe.ScribeIndex;
026 import ezvcard.io.scribe.VCardPropertyScribe;
027 import ezvcard.io.scribe.VCardPropertyScribe.Result;
028 import ezvcard.parameter.Encoding;
029 import ezvcard.parameter.VCardParameters;
030 import ezvcard.property.Address;
031 import ezvcard.property.Label;
032 import ezvcard.property.RawProperty;
033 import ezvcard.property.VCardProperty;
034 import ezvcard.util.IOUtils;
035 import ezvcard.util.org.apache.commons.codec.DecoderException;
036 import ezvcard.util.org.apache.commons.codec.net.QuotedPrintableCodec;
037
038 /*
039 Copyright (c) 2013, Michael Angstadt
040 All rights reserved.
041
042 Redistribution and use in source and binary forms, with or without
043 modification, are permitted provided that the following conditions are met:
044
045 1. Redistributions of source code must retain the above copyright notice, this
046 list of conditions and the following disclaimer.
047 2. Redistributions in binary form must reproduce the above copyright notice,
048 this list of conditions and the following disclaimer in the documentation
049 and/or other materials provided with the distribution.
050
051 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
052 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
053 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
054 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
055 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
056 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
057 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
058 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
059 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
060 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
061
062 The views and conclusions contained in the software and documentation are those
063 of the authors and should not be interpreted as representing official policies,
064 either expressed or implied, of the FreeBSD Project.
065 */
066
067 /**
068 * <p>
069 * Parses {@link VCard} objects from a plain-text vCard data stream.
070 * </p>
071 * <p>
072 * <b>Example:</b>
073 *
074 * <pre class="brush:java">
075 * File file = new File("vcards.vcf");
076 * VCardReader vcardReader = new VCardReader(file);
077 * VCard vcard;
078 * while ((vcard = vcardReader.readNext()) != null){
079 * ...
080 * }
081 * vcardReader.close();
082 * </pre>
083 *
084 * </p>
085 * @author Michael Angstadt
086 */
087 public class VCardReader implements Closeable {
088 private final List<String> warnings = new ArrayList<String>();
089 private ScribeIndex index = new ScribeIndex();
090 private Charset defaultQuotedPrintableCharset;
091 private final VCardRawReader reader;
092
093 /**
094 * Creates a reader that parses vCards from a string.
095 * @param str the string to read the vCards from
096 */
097 public VCardReader(String str) {
098 this(new StringReader(str));
099 }
100
101 /**
102 * Creates a reader that parses vCards from an input stream.
103 * @param in the input stream to read the vCards from
104 */
105 public VCardReader(InputStream in) {
106 this(new InputStreamReader(in));
107 }
108
109 /**
110 * Creates a reader that parses vCards from a file.
111 * @param file the file to read the vCards from
112 * @throws FileNotFoundException if the file doesn't exist
113 */
114 public VCardReader(File file) throws FileNotFoundException {
115 this(new FileReader(file));
116 }
117
118 /**
119 * Creates a reader that parses vCards from a reader.
120 * @param reader the reader to read the vCards from
121 */
122 public VCardReader(Reader reader) {
123 this.reader = new VCardRawReader(reader);
124 defaultQuotedPrintableCharset = this.reader.getEncoding();
125 if (defaultQuotedPrintableCharset == null) {
126 defaultQuotedPrintableCharset = Charset.defaultCharset();
127 }
128 }
129
130 /**
131 * Gets whether the reader will decode parameter values that use circumflex
132 * accent encoding (enabled by default). This escaping mechanism allows
133 * newlines and double quotes to be included in parameter values.
134 * @return true if circumflex accent decoding is enabled, false if not
135 * @see VCardRawReader#isCaretDecodingEnabled()
136 */
137 public boolean isCaretDecodingEnabled() {
138 return reader.isCaretDecodingEnabled();
139 }
140
141 /**
142 * Sets whether the reader will decode parameter values that use circumflex
143 * accent encoding (enabled by default). This escaping mechanism allows
144 * newlines and double quotes to be included in parameter values.
145 * @param enable true to use circumflex accent decoding, false not to
146 * @see VCardRawReader#setCaretDecodingEnabled(boolean)
147 */
148 public void setCaretDecodingEnabled(boolean enable) {
149 reader.setCaretDecodingEnabled(enable);
150 }
151
152 /**
153 * <p>
154 * Gets the character set to use when decoding quoted-printable values if
155 * the property has no CHARSET parameter, or if the CHARSET parameter is not
156 * a valid character set.
157 * </p>
158 * <p>
159 * By default, the Reader's character encoding will be used. If the Reader
160 * has no character encoding, then the system's default character encoding
161 * will be used.
162 * </p>
163 * @return the character set
164 */
165 public Charset getDefaultQuotedPrintableCharset() {
166 return defaultQuotedPrintableCharset;
167 }
168
169 /**
170 * <p>
171 * Sets the character set to use when decoding quoted-printable values if
172 * the property has no CHARSET parameter, or if the CHARSET parameter is not
173 * a valid character set.
174 * </p>
175 * <p>
176 * By default, the Reader's character encoding will be used. If the Reader
177 * has no character encoding, then the system's default character encoding
178 * will be used.
179 * </p>
180 * @param charset the character set
181 */
182 public void setDefaultQuotedPrintableCharset(Charset charset) {
183 defaultQuotedPrintableCharset = charset;
184 }
185
186 /**
187 * <p>
188 * Registers a property scribe. This is the same as calling:
189 * </p>
190 * <p>
191 * {@code getScribeIndex().register(scribe)}
192 * </p>
193 * @param scribe the scribe to register
194 */
195 public void registerScribe(VCardPropertyScribe<? extends VCardProperty> scribe) {
196 index.register(scribe);
197 }
198
199 /**
200 * Gets the scribe index.
201 * @return the scribe index
202 */
203 public ScribeIndex getScribeIndex() {
204 return index;
205 }
206
207 /**
208 * Sets the scribe index.
209 * @param index the scribe index
210 */
211 public void setScribeIndex(ScribeIndex index) {
212 this.index = index;
213 }
214
215 /**
216 * Gets the warnings from the last vCard that was unmarshalled. This list is
217 * reset every time a new vCard is read.
218 * @return the warnings or empty list if there were no warnings
219 */
220 public List<String> getWarnings() {
221 return new ArrayList<String>(warnings);
222 }
223
224 /**
225 * Reads the next vCard from the data stream.
226 * @return the next vCard or null if there are no more
227 * @throws IOException if there's a problem reading from the stream
228 */
229 public VCard readNext() throws IOException {
230 if (reader.eof()) {
231 return null;
232 }
233
234 warnings.clear();
235
236 VCardDataStreamListenerImpl listener = new VCardDataStreamListenerImpl();
237 reader.start(listener);
238
239 return listener.root;
240 }
241
242 /**
243 * Assigns names to all nameless parameters. v3.0 and v4.0 requires all
244 * parameters to have names, but v2.1 does not.
245 * @param parameters the parameters
246 */
247 private void handleNamelessParameters(VCardParameters parameters) {
248 List<String> namelessParamValues = parameters.get(null);
249 for (String paramValue : namelessParamValues) {
250 String paramName;
251 if (VCardDataType.find(paramValue) != null) {
252 paramName = VCardParameters.VALUE;
253 } else if (Encoding.find(paramValue) != null) {
254 paramName = VCardParameters.ENCODING;
255 } else {
256 //otherwise, assume it's a TYPE
257 paramName = VCardParameters.TYPE;
258 }
259 parameters.put(paramName, paramValue);
260 }
261 parameters.removeAll(null);
262 }
263
264 /**
265 * <p>
266 * Accounts for multi-valued TYPE parameters being enclosed entirely in
267 * double quotes (for example: ADR;TYPE="home,work").
268 * </p>
269 * <p>
270 * Many examples throughout the 4.0 specs show TYPE parameters being encoded
271 * in this way. This conflicts with the ABNF and is noted in the errata.
272 * This method will split the value by comma incase the vendor implemented
273 * it this way.
274 * </p>
275 * @param parameters the parameters
276 */
277 private void handleQuotedMultivaluedTypeParams(VCardParameters parameters) {
278 //account for multi-valued TYPE parameters being enclosed entirely in double quotes
279 //e.g. ADR;TYPE="home,work"
280 for (String typeParam : parameters.getTypes()) {
281 if (!typeParam.contains(",")) {
282 continue;
283 }
284
285 parameters.removeTypes();
286 for (String splitValue : typeParam.split(",")) {
287 parameters.addType(splitValue);
288 }
289 }
290 }
291
292 /**
293 * Decodes the property value if it's encoded in quoted-printable encoding.
294 * Quoted-printable encoding is only supported in v2.1.
295 * @param name the property name
296 * @param parameters the parameters
297 * @param value the property value
298 * @return the decoded property value
299 */
300 private String decodeQuotedPrintable(String name, VCardParameters parameters, String value) {
301 if (parameters.getEncoding() != Encoding.QUOTED_PRINTABLE) {
302 return value;
303 }
304
305 //remove encoding parameter
306 parameters.setEncoding(null);
307
308 //determine the character set
309 Charset charset = null;
310 String charsetStr = parameters.getCharset();
311 if (charsetStr == null) {
312 charset = defaultQuotedPrintableCharset;
313 } else {
314 try {
315 charset = Charset.forName(charsetStr);
316 } catch (Throwable t) {
317 charset = defaultQuotedPrintableCharset;
318
319 //the given charset was invalid, so add a warning
320 addWarning(name, 23, charsetStr, charset.name());
321 }
322 }
323
324 QuotedPrintableCodec codec = new QuotedPrintableCodec(charset.name());
325 try {
326 return codec.decode(value);
327 } catch (DecoderException e) {
328 //only thrown if the charset is invalid, which we know will never happen because we're using a Charset object
329 throw new RuntimeException(e);
330 }
331 }
332
333 /**
334 * Closes the underlying {@link Reader} object.
335 */
336 public void close() throws IOException {
337 reader.close();
338 }
339
340 private void addWarning(String propertyName, int code, Object... args) {
341 String message = Messages.INSTANCE.getParseMessage(code, args);
342 addWarning(propertyName, message);
343 }
344
345 private void addWarning(String propertyName, String message) {
346 int code = (propertyName == null) ? 37 : 36;
347 int line = reader.getLineNum();
348
349 String warning = Messages.INSTANCE.getParseMessage(code, line, propertyName, message);
350 warnings.add(warning);
351 }
352
353 private class VCardDataStreamListenerImpl implements VCardRawReader.VCardDataStreamListener {
354 private VCard root;
355 private final List<Label> labels = new ArrayList<Label>();
356 private final LinkedList<VCard> vcardStack = new LinkedList<VCard>();
357 private EmbeddedVCardException embeddedVCardException;
358
359 public void beginComponent(String name) {
360 if (!"VCARD".equalsIgnoreCase(name)) {
361 return;
362 }
363
364 VCard vcard = new VCard();
365
366 //initialize version to 2.1, since the VERSION property can exist anywhere in a 2.1 vCard
367 vcard.setVersion(VCardVersion.V2_1);
368
369 vcardStack.add(vcard);
370
371 if (root == null) {
372 root = vcard;
373 }
374
375 if (embeddedVCardException != null) {
376 embeddedVCardException.injectVCard(vcard);
377 embeddedVCardException = null;
378 }
379 }
380
381 public void readVersion(VCardVersion version) {
382 if (vcardStack.isEmpty()) {
383 //not in a "VCARD" component
384 return;
385 }
386
387 vcardStack.getLast().setVersion(version);
388 }
389
390 public void readProperty(String group, String name, VCardParameters parameters, String value) {
391 if (vcardStack.isEmpty()) {
392 //not in a "VCARD" component
393 return;
394 }
395
396 if (embeddedVCardException != null) {
397 //the next property was supposed to be the start of a nested vCard, but it wasn't
398 embeddedVCardException.injectVCard(null);
399 embeddedVCardException = null;
400 }
401
402 VCard curVCard = vcardStack.getLast();
403 VCardVersion version = curVCard.getVersion();
404
405 //massage the parameters
406 handleNamelessParameters(parameters);
407 handleQuotedMultivaluedTypeParams(parameters);
408
409 //decode property value from quoted-printable
410 value = decodeQuotedPrintable(name, parameters, value);
411
412 //get the scribe
413 VCardPropertyScribe<? extends VCardProperty> scribe = index.getPropertyScribe(name);
414 if (scribe == null) {
415 scribe = new RawPropertyScribe(name);
416 }
417
418 //get the data type
419 VCardDataType dataType = parameters.getValue();
420 if (dataType == null) {
421 //use the default data type if there is no VALUE parameter
422 dataType = scribe.defaultDataType(version);
423 } else {
424 //remove VALUE parameter if it is set
425 parameters.setValue(null);
426 }
427
428 VCardProperty property;
429 try {
430 Result<? extends VCardProperty> result = scribe.parseText(value, dataType, version, parameters);
431
432 for (String warning : result.getWarnings()) {
433 addWarning(name, warning);
434 }
435
436 property = result.getProperty();
437 property.setGroup(group);
438
439 if (property instanceof Label) {
440 //LABELs must be treated specially so they can be matched up with their ADRs
441 labels.add((Label) property);
442 return;
443 }
444 } catch (SkipMeException e) {
445 addWarning(name, 22, e.getMessage());
446 return;
447 } catch (CannotParseException e) {
448 addWarning(name, 25, value, e.getMessage());
449 property = new RawProperty(name, value);
450 property.setGroup(group);
451 } catch (EmbeddedVCardException e) {
452 //parse an embedded vCard (i.e. the AGENT type)
453 property = e.getProperty();
454
455 if (value.length() == 0 || version == VCardVersion.V2_1) {
456 //a nested vCard is expected to be next (2.1 style)
457 embeddedVCardException = e;
458 } else {
459 //the property value should be an embedded vCard (3.0 style)
460 value = VCardPropertyScribe.unescape(value);
461
462 VCardReader agentReader = new VCardReader(value);
463 try {
464 VCard nestedVCard = agentReader.readNext();
465 if (nestedVCard != null) {
466 e.injectVCard(nestedVCard);
467 }
468 } catch (IOException e2) {
469 //shouldn't be thrown because we're reading from a string
470 } finally {
471 for (String w : agentReader.getWarnings()) {
472 addWarning(name, 26, w);
473 }
474 IOUtils.closeQuietly(agentReader);
475 }
476 }
477 }
478
479 curVCard.addProperty(property);
480 }
481
482 public void endComponent(String name) {
483 if (vcardStack.isEmpty()) {
484 //not in a "VCARD" component
485 return;
486 }
487
488 if (!"VCARD".equalsIgnoreCase(name)) {
489 //not a "VCARD" component
490 return;
491 }
492
493 VCard curVCard = vcardStack.removeLast();
494
495 //assign labels to their addresses
496 for (Label label : labels) {
497 boolean orphaned = true;
498 for (Address adr : curVCard.getAddresses()) {
499 if (adr.getLabel() == null && adr.getTypes().equals(label.getTypes())) {
500 adr.setLabel(label.getValue());
501 orphaned = false;
502 break;
503 }
504 }
505 if (orphaned) {
506 curVCard.addOrphanedLabel(label);
507 }
508 }
509
510 if (vcardStack.isEmpty()) {
511 throw new VCardRawReader.StopReadingException();
512 }
513 }
514
515 public void invalidLine(String line) {
516 if (vcardStack.isEmpty()) {
517 //not in a "VCARD" component
518 return;
519 }
520
521 addWarning(null, 27, line);
522 }
523
524 public void invalidVersion(String version) {
525 if (vcardStack.isEmpty()) {
526 //not in a "VCARD" component
527 return;
528 }
529
530 addWarning("VERSION", 28, version);
531 }
532 }
533 }