001    package ezvcard.io.text;
002    
003    import java.io.Closeable;
004    import java.io.File;
005    import java.io.FileNotFoundException;
006    import java.io.FileReader;
007    import java.io.IOException;
008    import java.io.InputStream;
009    import java.io.InputStreamReader;
010    import java.io.Reader;
011    import java.io.StringReader;
012    import java.nio.charset.Charset;
013    import java.nio.charset.IllegalCharsetNameException;
014    import java.nio.charset.UnsupportedCharsetException;
015    import java.util.ArrayList;
016    import java.util.LinkedList;
017    import java.util.List;
018    
019    import ezvcard.Messages;
020    import ezvcard.VCard;
021    import ezvcard.VCardDataType;
022    import ezvcard.VCardVersion;
023    import ezvcard.io.CannotParseException;
024    import ezvcard.io.EmbeddedVCardException;
025    import ezvcard.io.SkipMeException;
026    import ezvcard.io.scribe.RawPropertyScribe;
027    import ezvcard.io.scribe.ScribeIndex;
028    import ezvcard.io.scribe.VCardPropertyScribe;
029    import ezvcard.io.scribe.VCardPropertyScribe.Result;
030    import ezvcard.parameter.Encoding;
031    import ezvcard.parameter.VCardParameters;
032    import ezvcard.property.Address;
033    import ezvcard.property.Label;
034    import ezvcard.property.RawProperty;
035    import ezvcard.property.VCardProperty;
036    import ezvcard.util.IOUtils;
037    import ezvcard.util.org.apache.commons.codec.DecoderException;
038    import ezvcard.util.org.apache.commons.codec.net.QuotedPrintableCodec;
039    
040    /*
041     Copyright (c) 2013, Michael Angstadt
042     All rights reserved.
043    
044     Redistribution and use in source and binary forms, with or without
045     modification, are permitted provided that the following conditions are met: 
046    
047     1. Redistributions of source code must retain the above copyright notice, this
048     list of conditions and the following disclaimer. 
049     2. Redistributions in binary form must reproduce the above copyright notice,
050     this list of conditions and the following disclaimer in the documentation
051     and/or other materials provided with the distribution. 
052    
053     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
054     ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
055     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
056     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
057     ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
058     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
059     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
060     ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
061     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
062     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
063    
064     The views and conclusions contained in the software and documentation are those
065     of the authors and should not be interpreted as representing official policies, 
066     either expressed or implied, of the FreeBSD Project.
067     */
068    
069    /**
070     * <p>
071     * Parses {@link VCard} objects from a plain-text vCard data stream.
072     * </p>
073     * <p>
074     * <b>Example:</b>
075     * 
076     * <pre class="brush:java">
077     * File file = new File("vcards.vcf");
078     * VCardReader vcardReader = new VCardReader(file);
079     * VCard vcard;
080     * while ((vcard = vcardReader.readNext()) != null){
081     *   ...
082     * }
083     * vcardReader.close();
084     * </pre>
085     * 
086     * </p>
087     * @author Michael Angstadt
088     */
089    public class VCardReader implements Closeable {
090            private final List<String> warnings = new ArrayList<String>();
091            private ScribeIndex index = new ScribeIndex();
092            private final VCardRawReader reader;
093    
094            /**
095             * Creates a reader that parses vCards from a string.
096             * @param str the string to read the vCards from
097             */
098            public VCardReader(String str) {
099                    this(new StringReader(str));
100            }
101    
102            /**
103             * Creates a reader that parses vCards from an input stream.
104             * @param in the input stream to read the vCards from
105             */
106            public VCardReader(InputStream in) {
107                    this(new InputStreamReader(in));
108            }
109    
110            /**
111             * Creates a reader that parses vCards from a file.
112             * @param file the file to read the vCards from
113             * @throws FileNotFoundException if the file doesn't exist
114             */
115            public VCardReader(File file) throws FileNotFoundException {
116                    this(new FileReader(file));
117            }
118    
119            /**
120             * Creates a reader that parses vCards from a reader.
121             * @param reader the reader to read the vCards from
122             */
123            public VCardReader(Reader reader) {
124                    this.reader = new VCardRawReader(reader);
125            }
126    
127            /**
128             * Gets whether the reader will decode parameter values that use circumflex
129             * accent encoding (enabled by default). This escaping mechanism allows
130             * newlines and double quotes to be included in parameter values.
131             * @return true if circumflex accent decoding is enabled, false if not
132             * @see VCardRawReader#isCaretDecodingEnabled()
133             */
134            public boolean isCaretDecodingEnabled() {
135                    return reader.isCaretDecodingEnabled();
136            }
137    
138            /**
139             * Sets whether the reader will decode parameter values that use circumflex
140             * accent encoding (enabled by default). This escaping mechanism allows
141             * newlines and double quotes to be included in parameter values.
142             * @param enable true to use circumflex accent decoding, false not to
143             * @see VCardRawReader#setCaretDecodingEnabled(boolean)
144             */
145            public void setCaretDecodingEnabled(boolean enable) {
146                    reader.setCaretDecodingEnabled(enable);
147            }
148    
149            /**
150             * <p>
151             * Registers a property scribe. This is the same as calling:
152             * </p>
153             * <p>
154             * {@code getScribeIndex().register(scribe)}
155             * </p>
156             * @param scribe the scribe to register
157             */
158            public void registerScribe(VCardPropertyScribe<? extends VCardProperty> scribe) {
159                    index.register(scribe);
160            }
161    
162            /**
163             * Gets the scribe index.
164             * @return the scribe index
165             */
166            public ScribeIndex getScribeIndex() {
167                    return index;
168            }
169    
170            /**
171             * Sets the scribe index.
172             * @param index the scribe index
173             */
174            public void setScribeIndex(ScribeIndex index) {
175                    this.index = index;
176            }
177    
178            /**
179             * Gets the warnings from the last vCard that was unmarshalled. This list is
180             * reset every time a new vCard is read.
181             * @return the warnings or empty list if there were no warnings
182             */
183            public List<String> getWarnings() {
184                    return new ArrayList<String>(warnings);
185            }
186    
187            /**
188             * Reads the next vCard from the data stream.
189             * @return the next vCard or null if there are no more
190             * @throws IOException if there's a problem reading from the stream
191             */
192            public VCard readNext() throws IOException {
193                    if (reader.eof()) {
194                            return null;
195                    }
196    
197                    warnings.clear();
198    
199                    VCardDataStreamListenerImpl listener = new VCardDataStreamListenerImpl();
200                    reader.start(listener);
201    
202                    return listener.root;
203            }
204    
205            /**
206             * Assigns names to all nameless parameters. v3.0 and v4.0 requires all
207             * parameters to have names, but v2.1 does not.
208             * @param parameters the parameters
209             */
210            private void handleNamelessParameters(VCardParameters parameters) {
211                    List<String> namelessParamValues = parameters.get(null);
212                    for (String paramValue : namelessParamValues) {
213                            String paramName;
214                            if (VCardDataType.find(paramValue) != null) {
215                                    paramName = VCardParameters.VALUE;
216                            } else if (Encoding.find(paramValue) != null) {
217                                    paramName = VCardParameters.ENCODING;
218                            } else {
219                                    //otherwise, assume it's a TYPE
220                                    paramName = VCardParameters.TYPE;
221                            }
222                            parameters.put(paramName, paramValue);
223                    }
224                    parameters.removeAll(null);
225            }
226    
227            /**
228             * <p>
229             * Accounts for multi-valued TYPE parameters being enclosed entirely in
230             * double quotes (for example: ADR;TYPE="home,work").
231             * </p>
232             * <p>
233             * Many examples throughout the 4.0 specs show TYPE parameters being encoded
234             * in this way. This conflicts with the ABNF and is noted in the errata.
235             * This method will split the value by comma incase the vendor implemented
236             * it this way.
237             * </p>
238             * @param parameters the parameters
239             */
240            private void handleQuotedMultivaluedTypeParams(VCardParameters parameters) {
241                    //account for multi-valued TYPE parameters being enclosed entirely in double quotes
242                    //e.g. ADR;TYPE="home,work"
243                    for (String typeParam : parameters.getTypes()) {
244                            if (!typeParam.contains(",")) {
245                                    continue;
246                            }
247    
248                            parameters.removeTypes();
249                            for (String splitValue : typeParam.split(",")) {
250                                    parameters.addType(splitValue);
251                            }
252                    }
253            }
254    
255            /**
256             * Decodes the property value if it's encoded in quoted-printable encoding.
257             * Quoted-printable encoding is only supported in v2.1.
258             * @param name the property name
259             * @param parameters the parameters
260             * @param value the property value
261             * @return the decoded property value
262             */
263            private String decodeQuotedPrintable(String name, VCardParameters parameters, String value) {
264                    if (parameters.getEncoding() != Encoding.QUOTED_PRINTABLE) {
265                            return value;
266                    }
267    
268                    //remove encoding parameter
269                    parameters.setEncoding(null);
270    
271                    //determine the character set
272                    Charset charset = null;
273                    String charsetStr = parameters.getCharset();
274                    if (charsetStr != null) {
275                            try {
276                                    charset = Charset.forName(charsetStr);
277                            } catch (IllegalCharsetNameException e) {
278                                    charset = null;
279                            } catch (UnsupportedCharsetException e) {
280                                    charset = null;
281                            }
282                    }
283                    if (charset == null) {
284                            charset = reader.getEncoding();
285                            if (charset == null) {
286                                    charset = Charset.defaultCharset();
287                            }
288                            if (charsetStr != null) {
289                                    //the given charset was invalid, so add a warning
290                                    addWarning(name, 23, charsetStr, charset);
291                            }
292                    }
293    
294                    QuotedPrintableCodec codec = new QuotedPrintableCodec(charset.name());
295                    try {
296                            return codec.decode(value);
297                    } catch (DecoderException e) {
298                            //only thrown if the charset is invalid, which we know will never happen because we're using a Charset object
299                            throw new RuntimeException(e);
300                    }
301            }
302    
303            /**
304             * Closes the underlying {@link Reader} object.
305             */
306            public void close() throws IOException {
307                    reader.close();
308            }
309    
310            private void addWarning(String propertyName, int code, Object... args) {
311                    String message = Messages.INSTANCE.getParseMessage(code, args);
312                    addWarning(propertyName, message);
313            }
314    
315            private void addWarning(String propertyName, String message) {
316                    int code = (propertyName == null) ? 37 : 36;
317                    int line = reader.getLineNum();
318    
319                    String warning = Messages.INSTANCE.getParseMessage(code, line, propertyName, message);
320                    warnings.add(warning);
321            }
322    
323            private class VCardDataStreamListenerImpl implements VCardRawReader.VCardDataStreamListener {
324                    private VCard root;
325                    private final List<Label> labels = new ArrayList<Label>();
326                    private final LinkedList<VCard> vcardStack = new LinkedList<VCard>();
327                    private EmbeddedVCardException embeddedVCardException;
328    
329                    public void beginComponent(String name) {
330                            if (!"VCARD".equalsIgnoreCase(name)) {
331                                    return;
332                            }
333    
334                            VCard vcard = new VCard();
335    
336                            //initialize version to 2.1, since the VERSION property can exist anywhere in a 2.1 vCard
337                            vcard.setVersion(VCardVersion.V2_1);
338    
339                            vcardStack.add(vcard);
340    
341                            if (root == null) {
342                                    root = vcard;
343                            }
344    
345                            if (embeddedVCardException != null) {
346                                    embeddedVCardException.injectVCard(vcard);
347                                    embeddedVCardException = null;
348                            }
349                    }
350    
351                    public void readVersion(VCardVersion version) {
352                            if (vcardStack.isEmpty()) {
353                                    //not in a "VCARD" component
354                                    return;
355                            }
356    
357                            vcardStack.getLast().setVersion(version);
358                    }
359    
360                    public void readProperty(String group, String name, VCardParameters parameters, String value) {
361                            if (vcardStack.isEmpty()) {
362                                    //not in a "VCARD" component
363                                    return;
364                            }
365    
366                            if (embeddedVCardException != null) {
367                                    //the next property was supposed to be the start of a nested vCard, but it wasn't
368                                    embeddedVCardException.injectVCard(null);
369                                    embeddedVCardException = null;
370                            }
371    
372                            VCard curVCard = vcardStack.getLast();
373                            VCardVersion version = curVCard.getVersion();
374    
375                            //massage the parameters
376                            handleNamelessParameters(parameters);
377                            handleQuotedMultivaluedTypeParams(parameters);
378    
379                            //decode property value from quoted-printable
380                            value = decodeQuotedPrintable(name, parameters, value);
381    
382                            //get the scribe
383                            VCardPropertyScribe<? extends VCardProperty> scribe = index.getPropertyScribe(name);
384                            if (scribe == null) {
385                                    scribe = new RawPropertyScribe(name);
386                            }
387    
388                            //get the data type
389                            VCardDataType dataType = parameters.getValue();
390                            if (dataType == null) {
391                                    //use the default data type if there is no VALUE parameter
392                                    dataType = scribe.defaultDataType(version);
393                            } else {
394                                    //remove VALUE parameter if it is set
395                                    parameters.setValue(null);
396                            }
397    
398                            VCardProperty property;
399                            try {
400                                    Result<? extends VCardProperty> result = scribe.parseText(value, dataType, version, parameters);
401    
402                                    for (String warning : result.getWarnings()) {
403                                            addWarning(name, warning);
404                                    }
405    
406                                    property = result.getProperty();
407                                    property.setGroup(group);
408    
409                                    if (property instanceof Label) {
410                                            //LABELs must be treated specially so they can be matched up with their ADRs
411                                            labels.add((Label) property);
412                                            return;
413                                    }
414                            } catch (SkipMeException e) {
415                                    addWarning(name, 22, e.getMessage());
416                                    return;
417                            } catch (CannotParseException e) {
418                                    addWarning(name, 25, value, e.getMessage());
419                                    property = new RawProperty(name, value);
420                                    property.setGroup(group);
421                            } catch (EmbeddedVCardException e) {
422                                    //parse an embedded vCard (i.e. the AGENT type)
423                                    property = e.getProperty();
424    
425                                    if (value.length() == 0 || version == VCardVersion.V2_1) {
426                                            //a nested vCard is expected to be next (2.1 style)
427                                            embeddedVCardException = e;
428                                    } else {
429                                            //the property value should be an embedded vCard (3.0 style)
430                                            value = VCardPropertyScribe.unescape(value);
431    
432                                            VCardReader agentReader = new VCardReader(value);
433                                            try {
434                                                    VCard nestedVCard = agentReader.readNext();
435                                                    if (nestedVCard != null) {
436                                                            e.injectVCard(nestedVCard);
437                                                    }
438                                            } catch (IOException e2) {
439                                                    //shouldn't be thrown because we're reading from a string
440                                            } finally {
441                                                    for (String w : agentReader.getWarnings()) {
442                                                            addWarning(name, 26, w);
443                                                    }
444                                                    IOUtils.closeQuietly(agentReader);
445                                            }
446                                    }
447                            }
448    
449                            curVCard.addProperty(property);
450                    }
451    
452                    public void endComponent(String name) {
453                            if (vcardStack.isEmpty()) {
454                                    //not in a "VCARD" component
455                                    return;
456                            }
457    
458                            if (!"VCARD".equalsIgnoreCase(name)) {
459                                    //not a "VCARD" component
460                                    return;
461                            }
462    
463                            VCard curVCard = vcardStack.removeLast();
464    
465                            //assign labels to their addresses
466                            for (Label label : labels) {
467                                    boolean orphaned = true;
468                                    for (Address adr : curVCard.getAddresses()) {
469                                            if (adr.getLabel() == null && adr.getTypes().equals(label.getTypes())) {
470                                                    adr.setLabel(label.getValue());
471                                                    orphaned = false;
472                                                    break;
473                                            }
474                                    }
475                                    if (orphaned) {
476                                            curVCard.addOrphanedLabel(label);
477                                    }
478                            }
479    
480                            if (vcardStack.isEmpty()) {
481                                    throw new VCardRawReader.StopReadingException();
482                            }
483                    }
484    
485                    public void invalidLine(String line) {
486                            if (vcardStack.isEmpty()) {
487                                    //not in a "VCARD" component
488                                    return;
489                            }
490    
491                            addWarning(null, 27, line);
492                    }
493    
494                    public void invalidVersion(String version) {
495                            if (vcardStack.isEmpty()) {
496                                    //not in a "VCARD" component
497                                    return;
498                            }
499    
500                            addWarning("VERSION", 28, version);
501                    }
502            }
503    }