001    package ezvcard.io.text;
002    
003    import java.io.Closeable;
004    import java.io.File;
005    import java.io.FileNotFoundException;
006    import java.io.FileReader;
007    import java.io.IOException;
008    import java.io.InputStream;
009    import java.io.InputStreamReader;
010    import java.io.Reader;
011    import java.io.StringReader;
012    import java.nio.charset.Charset;
013    import java.util.ArrayList;
014    import java.util.LinkedList;
015    import java.util.List;
016    
017    import ezvcard.Messages;
018    import ezvcard.VCard;
019    import ezvcard.VCardDataType;
020    import ezvcard.VCardVersion;
021    import ezvcard.io.CannotParseException;
022    import ezvcard.io.EmbeddedVCardException;
023    import ezvcard.io.SkipMeException;
024    import ezvcard.io.scribe.RawPropertyScribe;
025    import ezvcard.io.scribe.ScribeIndex;
026    import ezvcard.io.scribe.VCardPropertyScribe;
027    import ezvcard.io.scribe.VCardPropertyScribe.Result;
028    import ezvcard.parameter.Encoding;
029    import ezvcard.parameter.VCardParameters;
030    import ezvcard.property.Address;
031    import ezvcard.property.Label;
032    import ezvcard.property.RawProperty;
033    import ezvcard.property.VCardProperty;
034    import ezvcard.util.IOUtils;
035    import ezvcard.util.org.apache.commons.codec.DecoderException;
036    import ezvcard.util.org.apache.commons.codec.net.QuotedPrintableCodec;
037    
038    /*
039     Copyright (c) 2013, Michael Angstadt
040     All rights reserved.
041    
042     Redistribution and use in source and binary forms, with or without
043     modification, are permitted provided that the following conditions are met: 
044    
045     1. Redistributions of source code must retain the above copyright notice, this
046     list of conditions and the following disclaimer. 
047     2. Redistributions in binary form must reproduce the above copyright notice,
048     this list of conditions and the following disclaimer in the documentation
049     and/or other materials provided with the distribution. 
050    
051     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
052     ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
053     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
054     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
055     ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
056     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
057     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
058     ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
059     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
060     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
061    
062     The views and conclusions contained in the software and documentation are those
063     of the authors and should not be interpreted as representing official policies, 
064     either expressed or implied, of the FreeBSD Project.
065     */
066    
067    /**
068     * <p>
069     * Parses {@link VCard} objects from a plain-text vCard data stream.
070     * </p>
071     * <p>
072     * <b>Example:</b>
073     * 
074     * <pre class="brush:java">
075     * File file = new File("vcards.vcf");
076     * VCardReader vcardReader = new VCardReader(file);
077     * VCard vcard;
078     * while ((vcard = vcardReader.readNext()) != null){
079     *   ...
080     * }
081     * vcardReader.close();
082     * </pre>
083     * 
084     * </p>
085     * @author Michael Angstadt
086     */
087    public class VCardReader implements Closeable {
088            private final List<String> warnings = new ArrayList<String>();
089            private ScribeIndex index = new ScribeIndex();
090            private Charset defaultQuotedPrintableCharset;
091            private final VCardRawReader reader;
092    
093            /**
094             * Creates a reader that parses vCards from a string.
095             * @param str the string to read the vCards from
096             */
097            public VCardReader(String str) {
098                    this(new StringReader(str));
099            }
100    
101            /**
102             * Creates a reader that parses vCards from an input stream.
103             * @param in the input stream to read the vCards from
104             */
105            public VCardReader(InputStream in) {
106                    this(new InputStreamReader(in));
107            }
108    
109            /**
110             * Creates a reader that parses vCards from a file.
111             * @param file the file to read the vCards from
112             * @throws FileNotFoundException if the file doesn't exist
113             */
114            public VCardReader(File file) throws FileNotFoundException {
115                    this(new FileReader(file));
116            }
117    
118            /**
119             * Creates a reader that parses vCards from a reader.
120             * @param reader the reader to read the vCards from
121             */
122            public VCardReader(Reader reader) {
123                    this.reader = new VCardRawReader(reader);
124                    defaultQuotedPrintableCharset = this.reader.getEncoding();
125                    if (defaultQuotedPrintableCharset == null) {
126                            defaultQuotedPrintableCharset = Charset.defaultCharset();
127                    }
128            }
129    
130            /**
131             * Gets whether the reader will decode parameter values that use circumflex
132             * accent encoding (enabled by default). This escaping mechanism allows
133             * newlines and double quotes to be included in parameter values.
134             * @return true if circumflex accent decoding is enabled, false if not
135             * @see VCardRawReader#isCaretDecodingEnabled()
136             */
137            public boolean isCaretDecodingEnabled() {
138                    return reader.isCaretDecodingEnabled();
139            }
140    
141            /**
142             * Sets whether the reader will decode parameter values that use circumflex
143             * accent encoding (enabled by default). This escaping mechanism allows
144             * newlines and double quotes to be included in parameter values.
145             * @param enable true to use circumflex accent decoding, false not to
146             * @see VCardRawReader#setCaretDecodingEnabled(boolean)
147             */
148            public void setCaretDecodingEnabled(boolean enable) {
149                    reader.setCaretDecodingEnabled(enable);
150            }
151    
152            /**
153             * <p>
154             * Gets the character set to use when decoding quoted-printable values if
155             * the property has no CHARSET parameter, or if the CHARSET parameter is not
156             * a valid character set.
157             * </p>
158             * <p>
159             * By default, the Reader's character encoding will be used. If the Reader
160             * has no character encoding, then the system's default character encoding
161             * will be used.
162             * </p>
163             * @return the character set
164             */
165            public Charset getDefaultQuotedPrintableCharset() {
166                    return defaultQuotedPrintableCharset;
167            }
168    
169            /**
170             * <p>
171             * Sets the character set to use when decoding quoted-printable values if
172             * the property has no CHARSET parameter, or if the CHARSET parameter is not
173             * a valid character set.
174             * </p>
175             * <p>
176             * By default, the Reader's character encoding will be used. If the Reader
177             * has no character encoding, then the system's default character encoding
178             * will be used.
179             * </p>
180             * @param charset the character set
181             */
182            public void setDefaultQuotedPrintableCharset(Charset charset) {
183                    defaultQuotedPrintableCharset = charset;
184            }
185    
186            /**
187             * <p>
188             * Registers a property scribe. This is the same as calling:
189             * </p>
190             * <p>
191             * {@code getScribeIndex().register(scribe)}
192             * </p>
193             * @param scribe the scribe to register
194             */
195            public void registerScribe(VCardPropertyScribe<? extends VCardProperty> scribe) {
196                    index.register(scribe);
197            }
198    
199            /**
200             * Gets the scribe index.
201             * @return the scribe index
202             */
203            public ScribeIndex getScribeIndex() {
204                    return index;
205            }
206    
207            /**
208             * Sets the scribe index.
209             * @param index the scribe index
210             */
211            public void setScribeIndex(ScribeIndex index) {
212                    this.index = index;
213            }
214    
215            /**
216             * Gets the warnings from the last vCard that was unmarshalled. This list is
217             * reset every time a new vCard is read.
218             * @return the warnings or empty list if there were no warnings
219             */
220            public List<String> getWarnings() {
221                    return new ArrayList<String>(warnings);
222            }
223    
224            /**
225             * Reads the next vCard from the data stream.
226             * @return the next vCard or null if there are no more
227             * @throws IOException if there's a problem reading from the stream
228             */
229            public VCard readNext() throws IOException {
230                    if (reader.eof()) {
231                            return null;
232                    }
233    
234                    warnings.clear();
235    
236                    VCardDataStreamListenerImpl listener = new VCardDataStreamListenerImpl();
237                    reader.start(listener);
238    
239                    return listener.root;
240            }
241    
242            /**
243             * Assigns names to all nameless parameters. v3.0 and v4.0 requires all
244             * parameters to have names, but v2.1 does not.
245             * @param parameters the parameters
246             */
247            private void handleNamelessParameters(VCardParameters parameters) {
248                    List<String> namelessParamValues = parameters.get(null);
249                    for (String paramValue : namelessParamValues) {
250                            String paramName;
251                            if (VCardDataType.find(paramValue) != null) {
252                                    paramName = VCardParameters.VALUE;
253                            } else if (Encoding.find(paramValue) != null) {
254                                    paramName = VCardParameters.ENCODING;
255                            } else {
256                                    //otherwise, assume it's a TYPE
257                                    paramName = VCardParameters.TYPE;
258                            }
259                            parameters.put(paramName, paramValue);
260                    }
261                    parameters.removeAll(null);
262            }
263    
264            /**
265             * <p>
266             * Accounts for multi-valued TYPE parameters being enclosed entirely in
267             * double quotes (for example: ADR;TYPE="home,work").
268             * </p>
269             * <p>
270             * Many examples throughout the 4.0 specs show TYPE parameters being encoded
271             * in this way. This conflicts with the ABNF and is noted in the errata.
272             * This method will split the value by comma incase the vendor implemented
273             * it this way.
274             * </p>
275             * @param parameters the parameters
276             */
277            private void handleQuotedMultivaluedTypeParams(VCardParameters parameters) {
278                    //account for multi-valued TYPE parameters being enclosed entirely in double quotes
279                    //e.g. ADR;TYPE="home,work"
280                    for (String typeParam : parameters.getTypes()) {
281                            if (!typeParam.contains(",")) {
282                                    continue;
283                            }
284    
285                            parameters.removeTypes();
286                            for (String splitValue : typeParam.split(",")) {
287                                    parameters.addType(splitValue);
288                            }
289                    }
290            }
291    
292            /**
293             * Decodes the property value if it's encoded in quoted-printable encoding.
294             * Quoted-printable encoding is only supported in v2.1.
295             * @param name the property name
296             * @param parameters the parameters
297             * @param value the property value
298             * @return the decoded property value
299             */
300            private String decodeQuotedPrintable(String name, VCardParameters parameters, String value) {
301                    if (parameters.getEncoding() != Encoding.QUOTED_PRINTABLE) {
302                            return value;
303                    }
304    
305                    //remove encoding parameter
306                    parameters.setEncoding(null);
307    
308                    //determine the character set
309                    Charset charset = null;
310                    String charsetStr = parameters.getCharset();
311                    if (charsetStr == null) {
312                            charset = defaultQuotedPrintableCharset;
313                    } else {
314                            try {
315                                    charset = Charset.forName(charsetStr);
316                            } catch (Throwable t) {
317                                    charset = defaultQuotedPrintableCharset;
318    
319                                    //the given charset was invalid, so add a warning
320                                    addWarning(name, 23, charsetStr, charset.name());
321                            }
322                    }
323    
324                    QuotedPrintableCodec codec = new QuotedPrintableCodec(charset.name());
325                    try {
326                            return codec.decode(value);
327                    } catch (DecoderException e) {
328                            //only thrown if the charset is invalid, which we know will never happen because we're using a Charset object
329                            throw new RuntimeException(e);
330                    }
331            }
332    
333            /**
334             * Closes the underlying {@link Reader} object.
335             */
336            public void close() throws IOException {
337                    reader.close();
338            }
339    
340            private void addWarning(String propertyName, int code, Object... args) {
341                    String message = Messages.INSTANCE.getParseMessage(code, args);
342                    addWarning(propertyName, message);
343            }
344    
345            private void addWarning(String propertyName, String message) {
346                    int code = (propertyName == null) ? 37 : 36;
347                    int line = reader.getLineNum();
348    
349                    String warning = Messages.INSTANCE.getParseMessage(code, line, propertyName, message);
350                    warnings.add(warning);
351            }
352    
353            private class VCardDataStreamListenerImpl implements VCardRawReader.VCardDataStreamListener {
354                    private VCard root;
355                    private final List<Label> labels = new ArrayList<Label>();
356                    private final LinkedList<VCard> vcardStack = new LinkedList<VCard>();
357                    private EmbeddedVCardException embeddedVCardException;
358    
359                    public void beginComponent(String name) {
360                            if (!"VCARD".equalsIgnoreCase(name)) {
361                                    return;
362                            }
363    
364                            VCard vcard = new VCard();
365    
366                            //initialize version to 2.1, since the VERSION property can exist anywhere in a 2.1 vCard
367                            vcard.setVersion(VCardVersion.V2_1);
368    
369                            vcardStack.add(vcard);
370    
371                            if (root == null) {
372                                    root = vcard;
373                            }
374    
375                            if (embeddedVCardException != null) {
376                                    embeddedVCardException.injectVCard(vcard);
377                                    embeddedVCardException = null;
378                            }
379                    }
380    
381                    public void readVersion(VCardVersion version) {
382                            if (vcardStack.isEmpty()) {
383                                    //not in a "VCARD" component
384                                    return;
385                            }
386    
387                            vcardStack.getLast().setVersion(version);
388                    }
389    
390                    public void readProperty(String group, String name, VCardParameters parameters, String value) {
391                            if (vcardStack.isEmpty()) {
392                                    //not in a "VCARD" component
393                                    return;
394                            }
395    
396                            if (embeddedVCardException != null) {
397                                    //the next property was supposed to be the start of a nested vCard, but it wasn't
398                                    embeddedVCardException.injectVCard(null);
399                                    embeddedVCardException = null;
400                            }
401    
402                            VCard curVCard = vcardStack.getLast();
403                            VCardVersion version = curVCard.getVersion();
404    
405                            //massage the parameters
406                            handleNamelessParameters(parameters);
407                            handleQuotedMultivaluedTypeParams(parameters);
408    
409                            //decode property value from quoted-printable
410                            value = decodeQuotedPrintable(name, parameters, value);
411    
412                            //get the scribe
413                            VCardPropertyScribe<? extends VCardProperty> scribe = index.getPropertyScribe(name);
414                            if (scribe == null) {
415                                    scribe = new RawPropertyScribe(name);
416                            }
417    
418                            //get the data type
419                            VCardDataType dataType = parameters.getValue();
420                            if (dataType == null) {
421                                    //use the default data type if there is no VALUE parameter
422                                    dataType = scribe.defaultDataType(version);
423                            } else {
424                                    //remove VALUE parameter if it is set
425                                    parameters.setValue(null);
426                            }
427    
428                            VCardProperty property;
429                            try {
430                                    Result<? extends VCardProperty> result = scribe.parseText(value, dataType, version, parameters);
431    
432                                    for (String warning : result.getWarnings()) {
433                                            addWarning(name, warning);
434                                    }
435    
436                                    property = result.getProperty();
437                                    property.setGroup(group);
438    
439                                    if (property instanceof Label) {
440                                            //LABELs must be treated specially so they can be matched up with their ADRs
441                                            labels.add((Label) property);
442                                            return;
443                                    }
444                            } catch (SkipMeException e) {
445                                    addWarning(name, 22, e.getMessage());
446                                    return;
447                            } catch (CannotParseException e) {
448                                    addWarning(name, 25, value, e.getMessage());
449                                    property = new RawProperty(name, value);
450                                    property.setGroup(group);
451                            } catch (EmbeddedVCardException e) {
452                                    //parse an embedded vCard (i.e. the AGENT type)
453                                    property = e.getProperty();
454    
455                                    if (value.length() == 0 || version == VCardVersion.V2_1) {
456                                            //a nested vCard is expected to be next (2.1 style)
457                                            embeddedVCardException = e;
458                                    } else {
459                                            //the property value should be an embedded vCard (3.0 style)
460                                            value = VCardPropertyScribe.unescape(value);
461    
462                                            VCardReader agentReader = new VCardReader(value);
463                                            try {
464                                                    VCard nestedVCard = agentReader.readNext();
465                                                    if (nestedVCard != null) {
466                                                            e.injectVCard(nestedVCard);
467                                                    }
468                                            } catch (IOException e2) {
469                                                    //shouldn't be thrown because we're reading from a string
470                                            } finally {
471                                                    for (String w : agentReader.getWarnings()) {
472                                                            addWarning(name, 26, w);
473                                                    }
474                                                    IOUtils.closeQuietly(agentReader);
475                                            }
476                                    }
477                            }
478    
479                            curVCard.addProperty(property);
480                    }
481    
482                    public void endComponent(String name) {
483                            if (vcardStack.isEmpty()) {
484                                    //not in a "VCARD" component
485                                    return;
486                            }
487    
488                            if (!"VCARD".equalsIgnoreCase(name)) {
489                                    //not a "VCARD" component
490                                    return;
491                            }
492    
493                            VCard curVCard = vcardStack.removeLast();
494    
495                            //assign labels to their addresses
496                            for (Label label : labels) {
497                                    boolean orphaned = true;
498                                    for (Address adr : curVCard.getAddresses()) {
499                                            if (adr.getLabel() == null && adr.getTypes().equals(label.getTypes())) {
500                                                    adr.setLabel(label.getValue());
501                                                    orphaned = false;
502                                                    break;
503                                            }
504                                    }
505                                    if (orphaned) {
506                                            curVCard.addOrphanedLabel(label);
507                                    }
508                            }
509    
510                            if (vcardStack.isEmpty()) {
511                                    throw new VCardRawReader.StopReadingException();
512                            }
513                    }
514    
515                    public void invalidLine(String line) {
516                            if (vcardStack.isEmpty()) {
517                                    //not in a "VCARD" component
518                                    return;
519                            }
520    
521                            addWarning(null, 27, line);
522                    }
523    
524                    public void invalidVersion(String version) {
525                            if (vcardStack.isEmpty()) {
526                                    //not in a "VCARD" component
527                                    return;
528                            }
529    
530                            addWarning("VERSION", 28, version);
531                    }
532            }
533    }