001 package ezvcard.io.text;
002
003 import static ezvcard.util.StringUtils.NEWLINE;
004
005 import java.io.Closeable;
006 import java.io.IOException;
007 import java.io.Reader;
008 import java.nio.charset.Charset;
009
010 import ezvcard.VCardException;
011 import ezvcard.VCardVersion;
012 import ezvcard.parameter.VCardParameters;
013 import ezvcard.util.StringUtils;
014
015 /*
016 Copyright (c) 2013, Michael Angstadt
017 All rights reserved.
018
019 Redistribution and use in source and binary forms, with or without
020 modification, are permitted provided that the following conditions are met:
021
022 1. Redistributions of source code must retain the above copyright notice, this
023 list of conditions and the following disclaimer.
024 2. Redistributions in binary form must reproduce the above copyright notice,
025 this list of conditions and the following disclaimer in the documentation
026 and/or other materials provided with the distribution.
027
028 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
029 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
030 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
031 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
032 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
033 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
034 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
035 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
036 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
037 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
038
039 The views and conclusions contained in the software and documentation are those
040 of the authors and should not be interpreted as representing official policies,
041 either expressed or implied, of the FreeBSD Project.
042 */
043
044 /**
045 * Parses a vCard data stream.
046 * @author Michael Angstadt
047 */
048 public class VCardRawReader implements Closeable {
049 private final FoldedLineReader reader;
050 private boolean caretDecodingEnabled = true;
051 private boolean eof = false;
052 private VCardVersion version = VCardVersion.V2_1;
053
054 /**
055 * Creates a new reader.
056 * @param reader the reader to the data stream
057 */
058 public VCardRawReader(Reader reader) {
059 this.reader = new FoldedLineReader(reader);
060 }
061
062 /**
063 * Gets the line number of the last line that was read.
064 * @return the line number
065 */
066 public int getLineNum() {
067 return reader.getLineNum();
068 }
069
070 /**
071 * Starts or continues reading from the vCard data stream.
072 * @param listener handles the vCard data as it is read off the wire
073 * @throws IOException if there is an I/O problem
074 */
075 public void start(VCardDataStreamListener listener) throws IOException {
076 String line;
077 while ((line = reader.readLine()) != null) {
078 try {
079 parseLine(line, listener);
080 } catch (StopReadingException e) {
081 return;
082 }
083 }
084 eof = true;
085 }
086
087 private void parseLine(String line, VCardDataStreamListener listener) {
088 String group = null;
089 String propertyName = null;
090 VCardParameters parameters = new VCardParameters();
091 String value = null;
092
093 char escapeChar = 0; //is the next char escaped?
094 boolean inQuotes = false; //are we inside of double quotes?
095 StringBuilder buffer = new StringBuilder();
096 String curParamName = null;
097 for (int i = 0; i < line.length(); i++) {
098 char ch = line.charAt(i);
099 if (escapeChar != 0) {
100 if (escapeChar == '\\') {
101 if (ch == '\\') {
102 buffer.append(ch);
103 } else if (ch == 'n' || ch == 'N') {
104 //newlines appear as "\n" or "\N" (see RFC 2426 p.7)
105 buffer.append(NEWLINE);
106 } else if (ch == '"' && version != VCardVersion.V2_1) {
107 //double quotes don't need to be escaped in 2.1 parameter values because they have no special meaning
108 buffer.append(ch);
109 } else if (ch == ';' && version == VCardVersion.V2_1) {
110 //semi-colons can only be escaped in 2.1 parameter values (see section 2 of specs)
111 //if a 3.0/4.0 param value has semi-colons, the value should be surrounded in double quotes
112 buffer.append(ch);
113 } else {
114 //treat the escape character as a normal character because it's not a valid escape sequence
115 buffer.append(escapeChar).append(ch);
116 }
117 } else if (escapeChar == '^') {
118 if (ch == '^') {
119 buffer.append(ch);
120 } else if (ch == 'n') {
121 buffer.append(NEWLINE);
122 } else if (ch == '\'') {
123 buffer.append('"');
124 } else {
125 //treat the escape character as a normal character because it's not a valid escape sequence
126 buffer.append(escapeChar).append(ch);
127 }
128 }
129 escapeChar = 0;
130 } else if (ch == '\\' || (ch == '^' && version != VCardVersion.V2_1 && caretDecodingEnabled)) {
131 escapeChar = ch;
132 } else if (ch == '.' && group == null && propertyName == null) {
133 group = buffer.toString();
134 buffer.setLength(0);
135 } else if ((ch == ';' || ch == ':') && !inQuotes) {
136 if (propertyName == null) {
137 propertyName = buffer.toString();
138 } else {
139 //parameter value
140 String paramValue = buffer.toString();
141 if (version == VCardVersion.V2_1) {
142 //2.1 allows whitespace to surround the "=", so remove it
143 paramValue = StringUtils.ltrim(paramValue);
144 }
145 parameters.put(curParamName, paramValue);
146 curParamName = null;
147 }
148 buffer.setLength(0);
149
150 if (ch == ':') {
151 if (i < line.length() - 1) {
152 value = line.substring(i + 1);
153 } else {
154 value = "";
155 }
156 break;
157 }
158 } else if (ch == ',' && !inQuotes && version != VCardVersion.V2_1) {
159 //multi-valued parameter
160 parameters.put(curParamName, buffer.toString());
161 buffer.setLength(0);
162 } else if (ch == '=' && curParamName == null) {
163 //parameter name
164 String paramName = buffer.toString();
165 if (version == VCardVersion.V2_1) {
166 //2.1 allows whitespace to surround the "=", so remove it
167 paramName = StringUtils.rtrim(paramName);
168 }
169 curParamName = paramName;
170 buffer.setLength(0);
171 } else if (ch == '"' && version != VCardVersion.V2_1) {
172 //2.1 doesn't use the quoting mechanism
173 inQuotes = !inQuotes;
174 } else {
175 buffer.append(ch);
176 }
177 }
178
179 if (propertyName == null || value == null) {
180 listener.invalidLine(line);
181 return;
182 }
183 if ("VERSION".equalsIgnoreCase(propertyName)) {
184 VCardVersion version = VCardVersion.valueOfByStr(value.trim());
185 if (version == null) {
186 listener.invalidVersion(value);
187 } else {
188 this.version = version;
189 listener.readVersion(version);
190 }
191 return;
192 }
193 if ("BEGIN".equalsIgnoreCase(propertyName)) {
194 listener.beginComponent(value.trim());
195 return;
196 }
197 if ("END".equalsIgnoreCase(propertyName)) {
198 listener.endComponent(value.trim());
199 return;
200 }
201 listener.readProperty(group, propertyName, parameters, value);
202 }
203
204 /**
205 * <p>
206 * Gets whether the reader will decode parameter values that use circumflex
207 * accent encoding (enabled by default). This escaping mechanism allows
208 * newlines and double quotes to be included in parameter values.
209 * </p>
210 *
211 * <table border="1">
212 * <tr>
213 * <th>Raw Character</th>
214 * <th>Encoded Character</th>
215 * </tr>
216 * <tr>
217 * <td>{@code "}</td>
218 * <td>{@code ^'}</td>
219 * </tr>
220 * <tr>
221 * <td><i>newline</i></td>
222 * <td>{@code ^n}</td>
223 * </tr>
224 * <tr>
225 * <td>{@code ^}</td>
226 * <td>{@code ^^}</td>
227 * </tr>
228 * </table>
229 *
230 * <p>
231 * Example:
232 * </p>
233 *
234 * <pre>
235 * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt
236 * sburgh, PA 15212":40.446816;80.00566
237 * </pre>
238 *
239 * @return true if circumflex accent decoding is enabled, false if not
240 * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a>
241 */
242 public boolean isCaretDecodingEnabled() {
243 return caretDecodingEnabled;
244 }
245
246 /**
247 * <p>
248 * Sets whether the reader will decode parameter values that use circumflex
249 * accent encoding (enabled by default). This escaping mechanism allows
250 * newlines and double quotes to be included in parameter values.
251 * </p>
252 *
253 * <table border="1">
254 * <tr>
255 * <th>Raw Character</th>
256 * <th>Encoded Character</th>
257 * </tr>
258 * <tr>
259 * <td>{@code "}</td>
260 * <td>{@code ^'}</td>
261 * </tr>
262 * <tr>
263 * <td><i>newline</i></td>
264 * <td>{@code ^n}</td>
265 * </tr>
266 * <tr>
267 * <td>{@code ^}</td>
268 * <td>{@code ^^}</td>
269 * </tr>
270 * </table>
271 *
272 * <p>
273 * Example:
274 * </p>
275 *
276 * <pre>
277 * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt
278 * sburgh, PA 15212":geo:40.446816,-80.00566
279 * </pre>
280 *
281 * @param enable true to use circumflex accent decoding, false not to
282 * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a>
283 */
284 public void setCaretDecodingEnabled(boolean enable) {
285 caretDecodingEnabled = enable;
286 }
287
288 /**
289 * Determines whether the end of the data stream has been reached.
290 * @return true if the end has been reached, false if not
291 */
292 public boolean eof() {
293 return eof;
294 }
295
296 /**
297 * Gets the character encoding of the reader.
298 * @return the character encoding or null if none is defined
299 */
300 public Charset getEncoding() {
301 return reader.getEncoding();
302 }
303
304 /**
305 * Handles the vCard data as it is read off the data stream. Each one of
306 * this interface's methods may throw a {@link StopReadingException} at any
307 * time to force the parser to stop reading from the data stream. This will
308 * cause the reader to return from the {@link VCardRawReader#start} method.
309 * To continue reading from the data stream, simply call the
310 * {@link VCardRawReader#start} method again.
311 * @author Michael Angstadt
312 */
313 public static interface VCardDataStreamListener {
314 /**
315 * Called when a component begins (when a "BEGIN:NAME" property is
316 * reached).
317 * @param name the component name (e.g. "VCARD")
318 * @throws StopReadingException to force the reader to stop reading from
319 * the data stream
320 */
321 void beginComponent(String name);
322
323 /**
324 * Called when a property is read.
325 * @param group the group name or null if no group was defined
326 * @param name the property name (e.g. "VERSION")
327 * @param parameters the parameters
328 * @param value the property value
329 * @throws StopReadingException to force the reader to stop reading from
330 * the data stream
331 */
332 void readProperty(String group, String name, VCardParameters parameters, String value);
333
334 /**
335 * Called when the vCard's VERSION property is read.
336 * @param version the version that was read
337 */
338 void readVersion(VCardVersion version);
339
340 /**
341 * Called when a component ends (when a "END:NAME" property is reached).
342 * @param name the component name (e.g. "VCARD")
343 * @throws StopReadingException to force the reader to stop reading from
344 * the data stream
345 */
346 void endComponent(String name);
347
348 /**
349 * Called when a line cannot be parsed.
350 * @param line the unparseable line
351 * @throws StopReadingException to force the reader to stop reading from
352 * the data stream
353 */
354 void invalidLine(String line);
355
356 /**
357 * Called when an invalid VERSION property is encountered.
358 * @param version the invalid version
359 */
360 void invalidVersion(String version);
361 }
362
363 /**
364 * Instructs a {@link VCardRawReader} to stop reading from the data stream
365 * when thrown from a {@link VCardDataStreamListener} implementation.
366 * @author Michael Angstadt
367 */
368 @SuppressWarnings("serial")
369 public static class StopReadingException extends VCardException {
370 //empty
371 }
372
373 /**
374 * Closes the underlying {@link Reader} object.
375 */
376 public void close() throws IOException {
377 reader.close();
378 }
379 }