001    package ezvcard.io.text;
002    
003    import static ezvcard.util.StringUtils.NEWLINE;
004    
005    import java.io.Closeable;
006    import java.io.IOException;
007    import java.io.Reader;
008    import java.nio.charset.Charset;
009    
010    import ezvcard.VCardException;
011    import ezvcard.VCardVersion;
012    import ezvcard.parameter.VCardParameters;
013    import ezvcard.util.StringUtils;
014    
015    /*
016     Copyright (c) 2013, Michael Angstadt
017     All rights reserved.
018    
019     Redistribution and use in source and binary forms, with or without
020     modification, are permitted provided that the following conditions are met: 
021    
022     1. Redistributions of source code must retain the above copyright notice, this
023     list of conditions and the following disclaimer. 
024     2. Redistributions in binary form must reproduce the above copyright notice,
025     this list of conditions and the following disclaimer in the documentation
026     and/or other materials provided with the distribution. 
027    
028     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
029     ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
030     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
031     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
032     ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
033     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
034     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
035     ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
036     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
037     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
038    
039     The views and conclusions contained in the software and documentation are those
040     of the authors and should not be interpreted as representing official policies, 
041     either expressed or implied, of the FreeBSD Project.
042     */
043    
044    /**
045     * Parses a vCard data stream.
046     * @author Michael Angstadt
047     */
048    public class VCardRawReader implements Closeable {
049            private final FoldedLineReader reader;
050            private boolean caretDecodingEnabled = true;
051            private boolean eof = false;
052            private VCardVersion version = VCardVersion.V2_1;
053    
054            /**
055             * Creates a new reader.
056             * @param reader the reader to the data stream
057             */
058            public VCardRawReader(Reader reader) {
059                    this.reader = new FoldedLineReader(reader);
060            }
061    
062            /**
063             * Gets the line number of the last line that was read.
064             * @return the line number
065             */
066            public int getLineNum() {
067                    return reader.getLineNum();
068            }
069    
070            /**
071             * Starts or continues reading from the vCard data stream.
072             * @param listener handles the vCard data as it is read off the wire
073             * @throws IOException if there is an I/O problem
074             */
075            public void start(VCardDataStreamListener listener) throws IOException {
076                    String line;
077                    while ((line = reader.readLine()) != null) {
078                            try {
079                                    parseLine(line, listener);
080                            } catch (StopReadingException e) {
081                                    return;
082                            }
083                    }
084                    eof = true;
085            }
086    
087            private void parseLine(String line, VCardDataStreamListener listener) {
088                    String group = null;
089                    String propertyName = null;
090                    VCardParameters parameters = new VCardParameters();
091                    String value = null;
092    
093                    char escapeChar = 0; //is the next char escaped?
094                    boolean inQuotes = false; //are we inside of double quotes?
095                    StringBuilder buffer = new StringBuilder();
096                    String curParamName = null;
097                    for (int i = 0; i < line.length(); i++) {
098                            char ch = line.charAt(i);
099                            if (escapeChar != 0) {
100                                    if (escapeChar == '\\') {
101                                            if (ch == '\\') {
102                                                    buffer.append(ch);
103                                            } else if (ch == 'n' || ch == 'N') {
104                                                    //newlines appear as "\n" or "\N" (see RFC 2426 p.7)
105                                                    buffer.append(NEWLINE);
106                                            } else if (ch == '"' && version != VCardVersion.V2_1) {
107                                                    //double quotes don't need to be escaped in 2.1 parameter values because they have no special meaning
108                                                    buffer.append(ch);
109                                            } else if (ch == ';' && version == VCardVersion.V2_1) {
110                                                    //semi-colons can only be escaped in 2.1 parameter values (see section 2 of specs)
111                                                    //if a 3.0/4.0 param value has semi-colons, the value should be surrounded in double quotes
112                                                    buffer.append(ch);
113                                            } else {
114                                                    //treat the escape character as a normal character because it's not a valid escape sequence
115                                                    buffer.append(escapeChar).append(ch);
116                                            }
117                                    } else if (escapeChar == '^') {
118                                            if (ch == '^') {
119                                                    buffer.append(ch);
120                                            } else if (ch == 'n') {
121                                                    buffer.append(NEWLINE);
122                                            } else if (ch == '\'') {
123                                                    buffer.append('"');
124                                            } else {
125                                                    //treat the escape character as a normal character because it's not a valid escape sequence
126                                                    buffer.append(escapeChar).append(ch);
127                                            }
128                                    }
129                                    escapeChar = 0;
130                            } else if (ch == '\\' || (ch == '^' && version != VCardVersion.V2_1 && caretDecodingEnabled)) {
131                                    escapeChar = ch;
132                            } else if (ch == '.' && group == null && propertyName == null) {
133                                    group = buffer.toString();
134                                    buffer.setLength(0);
135                            } else if ((ch == ';' || ch == ':') && !inQuotes) {
136                                    if (propertyName == null) {
137                                            propertyName = buffer.toString();
138                                    } else {
139                                            //parameter value
140                                            String paramValue = buffer.toString();
141                                            if (version == VCardVersion.V2_1) {
142                                                    //2.1 allows whitespace to surround the "=", so remove it
143                                                    paramValue = StringUtils.ltrim(paramValue);
144                                            }
145                                            parameters.put(curParamName, paramValue);
146                                            curParamName = null;
147                                    }
148                                    buffer.setLength(0);
149    
150                                    if (ch == ':') {
151                                            if (i < line.length() - 1) {
152                                                    value = line.substring(i + 1);
153                                            } else {
154                                                    value = "";
155                                            }
156                                            break;
157                                    }
158                            } else if (ch == ',' && !inQuotes && version != VCardVersion.V2_1) {
159                                    //multi-valued parameter
160                                    parameters.put(curParamName, buffer.toString());
161                                    buffer.setLength(0);
162                            } else if (ch == '=' && curParamName == null) {
163                                    //parameter name
164                                    String paramName = buffer.toString();
165                                    if (version == VCardVersion.V2_1) {
166                                            //2.1 allows whitespace to surround the "=", so remove it
167                                            paramName = StringUtils.rtrim(paramName);
168                                    }
169                                    curParamName = paramName;
170                                    buffer.setLength(0);
171                            } else if (ch == '"' && version != VCardVersion.V2_1) {
172                                    //2.1 doesn't use the quoting mechanism
173                                    inQuotes = !inQuotes;
174                            } else {
175                                    buffer.append(ch);
176                            }
177                    }
178    
179                    if (propertyName == null || value == null) {
180                            listener.invalidLine(line);
181                            return;
182                    }
183                    if ("VERSION".equalsIgnoreCase(propertyName)) {
184                            VCardVersion version = VCardVersion.valueOfByStr(value.trim());
185                            if (version == null) {
186                                    listener.invalidVersion(value);
187                            } else {
188                                    this.version = version;
189                                    listener.readVersion(version);
190                            }
191                            return;
192                    }
193                    if ("BEGIN".equalsIgnoreCase(propertyName)) {
194                            listener.beginComponent(value.trim());
195                            return;
196                    }
197                    if ("END".equalsIgnoreCase(propertyName)) {
198                            listener.endComponent(value.trim());
199                            return;
200                    }
201                    listener.readProperty(group, propertyName, parameters, value);
202            }
203    
204            /**
205             * <p>
206             * Gets whether the reader will decode parameter values that use circumflex
207             * accent encoding (enabled by default). This escaping mechanism allows
208             * newlines and double quotes to be included in parameter values.
209             * </p>
210             * 
211             * <table border="1">
212             * <tr>
213             * <th>Raw Character</th>
214             * <th>Encoded Character</th>
215             * </tr>
216             * <tr>
217             * <td>{@code "}</td>
218             * <td>{@code ^'}</td>
219             * </tr>
220             * <tr>
221             * <td><i>newline</i></td>
222             * <td>{@code ^n}</td>
223             * </tr>
224             * <tr>
225             * <td>{@code ^}</td>
226             * <td>{@code ^^}</td>
227             * </tr>
228             * </table>
229             * 
230             * <p>
231             * Example:
232             * </p>
233             * 
234             * <pre>
235             * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt
236             *  sburgh, PA 15212":40.446816;80.00566
237             * </pre>
238             * 
239             * @return true if circumflex accent decoding is enabled, false if not
240             * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a>
241             */
242            public boolean isCaretDecodingEnabled() {
243                    return caretDecodingEnabled;
244            }
245    
246            /**
247             * <p>
248             * Sets whether the reader will decode parameter values that use circumflex
249             * accent encoding (enabled by default). This escaping mechanism allows
250             * newlines and double quotes to be included in parameter values.
251             * </p>
252             * 
253             * <table border="1">
254             * <tr>
255             * <th>Raw Character</th>
256             * <th>Encoded Character</th>
257             * </tr>
258             * <tr>
259             * <td>{@code "}</td>
260             * <td>{@code ^'}</td>
261             * </tr>
262             * <tr>
263             * <td><i>newline</i></td>
264             * <td>{@code ^n}</td>
265             * </tr>
266             * <tr>
267             * <td>{@code ^}</td>
268             * <td>{@code ^^}</td>
269             * </tr>
270             * </table>
271             * 
272             * <p>
273             * Example:
274             * </p>
275             * 
276             * <pre>
277             * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt
278             *  sburgh, PA 15212":geo:40.446816,-80.00566
279             * </pre>
280             * 
281             * @param enable true to use circumflex accent decoding, false not to
282             * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a>
283             */
284            public void setCaretDecodingEnabled(boolean enable) {
285                    caretDecodingEnabled = enable;
286            }
287    
288            /**
289             * Determines whether the end of the data stream has been reached.
290             * @return true if the end has been reached, false if not
291             */
292            public boolean eof() {
293                    return eof;
294            }
295    
296            /**
297             * Gets the character encoding of the reader.
298             * @return the character encoding or null if none is defined
299             */
300            public Charset getEncoding() {
301                    return reader.getEncoding();
302            }
303    
304            /**
305             * Handles the vCard data as it is read off the data stream. Each one of
306             * this interface's methods may throw a {@link StopReadingException} at any
307             * time to force the parser to stop reading from the data stream. This will
308             * cause the reader to return from the {@link VCardRawReader#start} method.
309             * To continue reading from the data stream, simply call the
310             * {@link VCardRawReader#start} method again.
311             * @author Michael Angstadt
312             */
313            public static interface VCardDataStreamListener {
314                    /**
315                     * Called when a component begins (when a "BEGIN:NAME" property is
316                     * reached).
317                     * @param name the component name (e.g. "VCARD")
318                     * @throws StopReadingException to force the reader to stop reading from
319                     * the data stream
320                     */
321                    void beginComponent(String name);
322    
323                    /**
324                     * Called when a property is read.
325                     * @param group the group name or null if no group was defined
326                     * @param name the property name (e.g. "VERSION")
327                     * @param parameters the parameters
328                     * @param value the property value
329                     * @throws StopReadingException to force the reader to stop reading from
330                     * the data stream
331                     */
332                    void readProperty(String group, String name, VCardParameters parameters, String value);
333    
334                    /**
335                     * Called when the vCard's VERSION property is read.
336                     * @param version the version that was read
337                     */
338                    void readVersion(VCardVersion version);
339    
340                    /**
341                     * Called when a component ends (when a "END:NAME" property is reached).
342                     * @param name the component name (e.g. "VCARD")
343                     * @throws StopReadingException to force the reader to stop reading from
344                     * the data stream
345                     */
346                    void endComponent(String name);
347    
348                    /**
349                     * Called when a line cannot be parsed.
350                     * @param line the unparseable line
351                     * @throws StopReadingException to force the reader to stop reading from
352                     * the data stream
353                     */
354                    void invalidLine(String line);
355    
356                    /**
357                     * Called when an invalid VERSION property is encountered.
358                     * @param version the invalid version
359                     */
360                    void invalidVersion(String version);
361            }
362    
363            /**
364             * Instructs a {@link VCardRawReader} to stop reading from the data stream
365             * when thrown from a {@link VCardDataStreamListener} implementation.
366             * @author Michael Angstadt
367             */
368            @SuppressWarnings("serial")
369            public static class StopReadingException extends VCardException {
370                    //empty
371            }
372    
373            /**
374             * Closes the underlying {@link Reader} object.
375             */
376            public void close() throws IOException {
377                    reader.close();
378            }
379    }