001 package ezvcard.io.text;
002
003 import java.io.Closeable;
004 import java.io.Flushable;
005 import java.io.IOException;
006 import java.io.Writer;
007 import java.nio.charset.Charset;
008 import java.nio.charset.IllegalCharsetNameException;
009 import java.nio.charset.UnsupportedCharsetException;
010 import java.util.BitSet;
011 import java.util.HashMap;
012 import java.util.List;
013 import java.util.Map;
014 import java.util.regex.Pattern;
015
016 import ezvcard.VCardVersion;
017 import ezvcard.parameter.Encoding;
018 import ezvcard.parameter.VCardParameters;
019
020 /*
021 Copyright (c) 2013, Michael Angstadt
022 All rights reserved.
023
024 Redistribution and use in source and binary forms, with or without
025 modification, are permitted provided that the following conditions are met:
026
027 1. Redistributions of source code must retain the above copyright notice, this
028 list of conditions and the following disclaimer.
029 2. Redistributions in binary form must reproduce the above copyright notice,
030 this list of conditions and the following disclaimer in the documentation
031 and/or other materials provided with the distribution.
032
033 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
034 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
035 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
036 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
037 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
038 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
039 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
040 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
041 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
042 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
043
044 The views and conclusions contained in the software and documentation are those
045 of the authors and should not be interpreted as representing official policies,
046 either expressed or implied, of the FreeBSD Project.
047 */
048
049 /**
050 * Writes data to an vCard data stream.
051 * @author Michael Angstadt
052 */
053 public class VCardRawWriter implements Closeable, Flushable {
054 /**
055 * Regular expression used to determine if a parameter value needs to be
056 * quoted.
057 */
058 private static final Pattern quoteMeRegex = Pattern.compile(".*?[,:;].*");
059
060 /**
061 * Regular expression used to detect newline character sequences.
062 */
063 private static final Pattern newlineRegex = Pattern.compile("\\r\\n|\\r|\\n");
064
065 /**
066 * Regular expression used to determine if a property name contains any
067 * invalid characters.
068 */
069 private static final Pattern propertyNameRegex = Pattern.compile("(?i)[-a-z0-9]+");
070
071 /**
072 * The characters that are not valid in parameter values and that should be
073 * removed.
074 */
075 private static final Map<VCardVersion, BitSet> invalidParamValueChars = new HashMap<VCardVersion, BitSet>();
076 static {
077 BitSet controlChars = new BitSet(128);
078 controlChars.set(0, 31);
079 controlChars.set(127);
080 controlChars.set('\t', false); //allow
081 controlChars.set('\n', false); //allow
082 controlChars.set('\r', false); //allow
083
084 //2.1
085 {
086 BitSet bitSet = new BitSet(128);
087 bitSet.or(controlChars);
088
089 bitSet.set(',');
090 bitSet.set('.');
091 bitSet.set(':');
092 bitSet.set('=');
093 bitSet.set('[');
094 bitSet.set(']');
095
096 invalidParamValueChars.put(VCardVersion.V2_1, bitSet);
097 }
098
099 //3.0, 4.0
100 {
101 BitSet bitSet = new BitSet(128);
102 bitSet.or(controlChars);
103
104 invalidParamValueChars.put(VCardVersion.V3_0, bitSet);
105 invalidParamValueChars.put(VCardVersion.V4_0, bitSet);
106 }
107 }
108
109 private final String newline;
110 private boolean caretEncodingEnabled = false;
111 private final FoldingScheme foldingScheme;
112 private final FoldedLineWriter writer;
113 private ProblemsListener problemsListener;
114 private VCardVersion version;
115
116 /**
117 * Creates a vCard raw writer using the standard folding scheme and newline
118 * sequence.
119 * @param writer the writer to the data stream
120 * @param version the vCard version to adhere to
121 */
122 public VCardRawWriter(Writer writer, VCardVersion version) {
123 this(writer, version, FoldingScheme.MIME_DIR);
124 }
125
126 /**
127 * Creates a vCard raw writer using the standard newline sequence.
128 * @param writer the writer to the data stream
129 * @param version the vCard version to adhere to
130 * @param foldingScheme the folding scheme to use or null not to fold at all
131 */
132 public VCardRawWriter(Writer writer, VCardVersion version, FoldingScheme foldingScheme) {
133 this(writer, version, foldingScheme, "\r\n");
134 }
135
136 /**
137 * Creates a vCard raw writer.
138 * @param writer the writer to the data stream
139 * @param version the vCard version to adhere to
140 * @param foldingScheme the folding scheme to use or null not to fold at all
141 * @param newline the newline sequence to use
142 */
143 public VCardRawWriter(Writer writer, VCardVersion version, FoldingScheme foldingScheme, String newline) {
144 if (foldingScheme == null) {
145 this.writer = new FoldedLineWriter(writer, null, "", newline);
146 } else {
147 this.writer = new FoldedLineWriter(writer, foldingScheme.getLineLength(), foldingScheme.getIndent(), newline);
148 }
149 this.version = version;
150 this.foldingScheme = foldingScheme;
151 this.newline = newline;
152 }
153
154 /**
155 * <p>
156 * Gets whether the writer will apply circumflex accent encoding on
157 * parameter values (disabled by default, only applies to 3.0 and 4.0
158 * vCards). This escaping mechanism allows for newlines and double quotes to
159 * be included in parameter values.
160 * </p>
161 *
162 * <p>
163 * When disabled, the writer will replace newlines with spaces and double
164 * quotes with single quotes.
165 * </p>
166 *
167 * <table border="1">
168 * <tr>
169 * <th>Character</th>
170 * <th>Replacement<br>
171 * (when disabled)</th>
172 * <th>Replacement<br>
173 * (when enabled)</th>
174 * </tr>
175 * <tr>
176 * <td>{@code "}</td>
177 * <td>{@code '}</td>
178 * <td>{@code ^'}</td>
179 * </tr>
180 * <tr>
181 * <td><i>newline</i></td>
182 * <td><code><i>space</i></code></td>
183 * <td>{@code ^n}</td>
184 * </tr>
185 * <tr>
186 * <td>{@code ^}</td>
187 * <td>{@code ^}</td>
188 * <td>{@code ^^}</td>
189 * </tr>
190 * </table>
191 *
192 * <p>
193 * Example:
194 * </p>
195 *
196 * <pre>
197 * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt
198 * sburgh, PA 15212":40.446816;80.00566
199 * </pre>
200 *
201 * @return true if circumflex accent encoding is enabled, false if not
202 * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a>
203 */
204 public boolean isCaretEncodingEnabled() {
205 return caretEncodingEnabled;
206 }
207
208 /**
209 * <p>
210 * Sets whether the writer will apply circumflex accent encoding on
211 * parameter values (disabled by default, only applies to 3.0 and 4.0
212 * vCards). This escaping mechanism allows for newlines and double quotes to
213 * be included in parameter values.
214 * </p>
215 *
216 * <p>
217 * When disabled, the writer will replace newlines with spaces and double
218 * quotes with single quotes.
219 * </p>
220 *
221 * <table border="1">
222 * <tr>
223 * <th>Character</th>
224 * <th>Replacement<br>
225 * (when disabled)</th>
226 * <th>Replacement<br>
227 * (when enabled)</th>
228 * </tr>
229 * <tr>
230 * <td>{@code "}</td>
231 * <td>{@code '}</td>
232 * <td>{@code ^'}</td>
233 * </tr>
234 * <tr>
235 * <td><i>newline</i></td>
236 * <td><code><i>space</i></code></td>
237 * <td>{@code ^n}</td>
238 * </tr>
239 * <tr>
240 * <td>{@code ^}</td>
241 * <td>{@code ^}</td>
242 * <td>{@code ^^}</td>
243 * </tr>
244 * </table>
245 *
246 * <p>
247 * Example:
248 * </p>
249 *
250 * <pre>
251 * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt
252 * sburgh, PA 15212":40.446816;80.00566
253 * </pre>
254 *
255 * @param enable true to use circumflex accent encoding, false not to
256 * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a>
257 */
258 public void setCaretEncodingEnabled(boolean enable) {
259 caretEncodingEnabled = enable;
260 }
261
262 /**
263 * Gets the vCard version that the writer is adhering to.
264 * @return the version
265 */
266 public VCardVersion getVersion() {
267 return version;
268 }
269
270 /**
271 * Sets the vCard version that the writer should adhere to.
272 * @param version the version
273 */
274 public void setVersion(VCardVersion version) {
275 this.version = version;
276 }
277
278 /**
279 * Gets the newline sequence that is used to separate lines.
280 * @return the newline sequence
281 */
282 public String getNewline() {
283 return newline;
284 }
285
286 /**
287 * Gets the problems listener.
288 * @return the listener or null if not set
289 */
290 public ProblemsListener getProblemsListener() {
291 return problemsListener;
292 }
293
294 /**
295 * Sets the problems listener.
296 * @param problemsListener the listener or null to remove
297 */
298 public void setProblemsListener(ProblemsListener problemsListener) {
299 this.problemsListener = problemsListener;
300 }
301
302 /**
303 * Gets the rules for how each line is folded.
304 * @return the folding scheme or null if the lines are not folded
305 */
306 public FoldingScheme getFoldingScheme() {
307 return foldingScheme;
308 }
309
310 /**
311 * Writes a property marking the beginning of a component (in other words,
312 * writes a "BEGIN:NAME" property).
313 * @param componentName the component name (e.g. "VCARD")
314 * @throws IOException if there's an I/O problem
315 */
316 public void writeBeginComponent(String componentName) throws IOException {
317 writeProperty("BEGIN", componentName);
318 }
319
320 /**
321 * Writes a property marking the end of a component (in other words, writes
322 * a "END:NAME" property).
323 * @param componentName the component name (e.g. "VCARD")
324 * @throws IOException if there's an I/O problem
325 */
326 public void writeEndComponent(String componentName) throws IOException {
327 writeProperty("END", componentName);
328 }
329
330 /**
331 * Writes a "VERSION" property, based on the vCard version that the writer
332 * is adhering to.
333 * @throws IOException if there's an I/O problem
334 */
335 public void writeVersion() throws IOException {
336 writeProperty("VERSION", version.getVersion());
337 }
338
339 /**
340 * Writes a property to the vCard data stream.
341 * @param propertyName the property name (e.g. "FN")
342 * @param value the property value
343 * @throws IllegalArgumentException if the property name contains invalid
344 * characters
345 * @throws IOException if there's an I/O problem
346 */
347 public void writeProperty(String propertyName, String value) throws IOException {
348 writeProperty(null, propertyName, new VCardParameters(), value);
349 }
350
351 /**
352 * Writes a property to the vCard data stream.
353 * @param group the group or null if there is no group
354 * @param propertyName the property name (e.g. "FN")
355 * @param parameters the property parameters
356 * @param value the property value (will be converted to "quoted-printable"
357 * encoding if the {@link Encoding#QUOTED_PRINTABLE} parameter is set)
358 * @throws IllegalArgumentException if the group or property name contains
359 * invalid characters
360 * @throws IOException if there's an I/O problem
361 */
362 public void writeProperty(String group, String propertyName, VCardParameters parameters, String value) throws IOException {
363 //validate the group name
364 if (group != null && !propertyNameRegex.matcher(group).matches()) {
365 throw new IllegalArgumentException("Group contains invalid characters. Valid characters are letters, numbers, and hyphens: " + group);
366 }
367
368 //validate the property name
369 if (!propertyNameRegex.matcher(propertyName).matches()) {
370 throw new IllegalArgumentException("Property name contains invalid characters. Valid characters are letters, numbers, and hyphens: " + propertyName);
371 }
372
373 value = sanitizeValue(parameters, value);
374
375 //determine if the property value must be encoded in quoted printable
376 //and determine the charset to use when encoding to quoted-printable
377 boolean quotedPrintable = (parameters.getEncoding() == Encoding.QUOTED_PRINTABLE);
378 Charset charset = null;
379 if (quotedPrintable) {
380 String charsetParam = parameters.getCharset();
381 if (charsetParam != null) {
382 try {
383 charset = Charset.forName(charsetParam);
384 } catch (IllegalCharsetNameException e) {
385 charset = null;
386 } catch (UnsupportedCharsetException e) {
387 charset = null;
388 }
389 }
390 if (charset == null) {
391 charset = writer.getEncoding();
392 if (charset == null) {
393 charset = Charset.defaultCharset();
394 }
395 }
396 parameters.setCharset(charset.name());
397 }
398
399 //write the group
400 if (group != null) {
401 writer.append(group);
402 writer.append('.');
403 }
404
405 //write the property name
406 writer.append(propertyName);
407
408 //write the parameters
409 for (Map.Entry<String, List<String>> subType : parameters) {
410 String parameterName = subType.getKey();
411 List<String> parameterValues = subType.getValue();
412 if (parameterValues.isEmpty()) {
413 continue;
414 }
415
416 if (version == VCardVersion.V2_1) {
417 boolean isTypeParameter = VCardParameters.TYPE.equalsIgnoreCase(parameterName);
418 for (String parameterValue : parameterValues) {
419 parameterValue = sanitizeParameterValue(parameterValue, parameterName, propertyName);
420
421 if (isTypeParameter) {
422 //e.g. ADR;HOME;WORK:
423 writer.append(';').append(parameterValue.toUpperCase());
424 } else {
425 //e.g. ADR;FOO=bar;FOO=car:
426 writer.append(';').append(parameterName).append('=').append(parameterValue);
427 }
428 }
429 } else {
430 //e.g. ADR;TYPE=home,work,"another,value":
431
432 boolean first = true;
433 writer.append(';').append(parameterName).append('=');
434 for (String parameterValue : parameterValues) {
435 if (!first) {
436 writer.append(',');
437 }
438
439 parameterValue = sanitizeParameterValue(parameterValue, parameterName, propertyName);
440
441 //surround with double quotes if contains special chars
442 if (quoteMeRegex.matcher(parameterValue).matches()) {
443 writer.append('"');
444 writer.append(parameterValue);
445 writer.append('"');
446 } else {
447 writer.append(parameterValue);
448 }
449
450 first = false;
451 }
452 }
453 }
454
455 writer.append(':');
456 writer.append(value, quotedPrintable, charset);
457 writer.append(newline);
458 }
459
460 /**
461 * Sanitizes a property value for safe inclusion in a vCard.
462 * @param parameters the parameters
463 * @param value the value to sanitize
464 * @return the sanitized value
465 */
466 private String sanitizeValue(VCardParameters parameters, String value) {
467 if (value == null) {
468 return "";
469 }
470
471 if (version == VCardVersion.V2_1 && containsNewlines(value)) {
472 //2.1 does not support the "\n" escape sequence (see "Delimiters" sub-section in section 2 of the specs)
473 parameters.setEncoding(Encoding.QUOTED_PRINTABLE);
474 return value;
475 }
476
477 return escapeNewlines(value);
478 }
479
480 /**
481 * Removes or escapes all invalid characters in a parameter value.
482 * @param parameterValue the parameter value
483 * @param parameterName the parameter name
484 * @param propertyName the name of the property to which the parameter
485 * belongs
486 * @return the sanitized parameter value
487 */
488 private String sanitizeParameterValue(String parameterValue, String parameterName, String propertyName) {
489 String modifiedValue = null;
490 boolean valueChanged = false;
491
492 //Note: String reference comparisons ("==") are used because the Pattern class returns the same instance if the String wasn't changed
493
494 switch (version) {
495 case V2_1:
496 //remove invalid characters
497 modifiedValue = removeInvalidParameterValueChars(parameterValue);
498
499 //replace newlines with spaces
500 modifiedValue = newlineRegex.matcher(modifiedValue).replaceAll(" ");
501
502 //check to see if value was changed
503 valueChanged = (parameterValue != modifiedValue);
504
505 //escape backslashes
506 modifiedValue = modifiedValue.replace("\\", "\\\\");
507
508 //escape semi-colons (see section 2)
509 modifiedValue = modifiedValue.replace(";", "\\;");
510
511 break;
512
513 case V3_0:
514 //remove invalid characters
515 modifiedValue = removeInvalidParameterValueChars(parameterValue);
516
517 if (caretEncodingEnabled) {
518 valueChanged = (modifiedValue != parameterValue);
519
520 //apply caret encoding
521 modifiedValue = applyCaretEncoding(modifiedValue);
522 } else {
523 //replace double quotes with single quotes
524 modifiedValue = modifiedValue.replace('"', '\'');
525
526 //replace newlines with spaces
527 modifiedValue = newlineRegex.matcher(modifiedValue).replaceAll(" ");
528
529 valueChanged = (modifiedValue != parameterValue);
530 }
531
532 break;
533
534 case V4_0:
535 //remove invalid characters
536 modifiedValue = removeInvalidParameterValueChars(parameterValue);
537
538 if (caretEncodingEnabled) {
539 valueChanged = (modifiedValue != parameterValue);
540
541 //apply caret encoding
542 modifiedValue = applyCaretEncoding(modifiedValue);
543 } else {
544 //replace double quotes with single quotes
545 modifiedValue = modifiedValue.replace('"', '\'');
546
547 valueChanged = (modifiedValue != parameterValue);
548
549 //backslash-escape newlines (for the "LABEL" parameter)
550 modifiedValue = newlineRegex.matcher(modifiedValue).replaceAll("\\\\\\n");
551 }
552
553 break;
554 }
555
556 if (valueChanged && problemsListener != null) {
557 problemsListener.onParameterValueChanged(propertyName, parameterName, parameterValue, modifiedValue);
558 }
559
560 return modifiedValue;
561 }
562
563 /**
564 * Removes invalid characters from a parameter value.
565 * @param value the parameter value
566 * @return the sanitized parameter value
567 */
568 private String removeInvalidParameterValueChars(String value) {
569 BitSet invalidChars = invalidParamValueChars.get(version);
570 StringBuilder sb = new StringBuilder(value.length());
571
572 for (int i = 0; i < value.length(); i++) {
573 char ch = value.charAt(i);
574 if (!invalidChars.get(ch)) {
575 sb.append(ch);
576 }
577 }
578
579 return (sb.length() == value.length()) ? value : sb.toString();
580 }
581
582 /**
583 * Applies circumflex accent encoding to a string.
584 * @param value the string
585 * @return the encoded string
586 */
587 private String applyCaretEncoding(String value) {
588 value = value.replace("^", "^^");
589 value = newlineRegex.matcher(value).replaceAll("^n");
590 value = value.replace("\"", "^'");
591 return value;
592 }
593
594 /**
595 * <p>
596 * Escapes all newline character sequences. The newline character sequences
597 * are:
598 * </p>
599 * <ul>
600 * <li>{@code \r\n}</li>
601 * <li>{@code \r}</li>
602 * <li>{@code \n}</li>
603 * </ul>
604 * @param text the text to escape
605 * @return the escaped text
606 */
607 private String escapeNewlines(String text) {
608 return newlineRegex.matcher(text).replaceAll("\\\\n");
609 }
610
611 /**
612 * <p>
613 * Determines if a string has at least one newline character sequence. The
614 * newline character sequences are:
615 * </p>
616 * <ul>
617 * <li>{@code \r\n}</li>
618 * <li>{@code \r}</li>
619 * <li>{@code \n}</li>
620 * </ul>
621 * @param text the text to escape
622 * @return the escaped text
623 */
624 private boolean containsNewlines(String text) {
625 return newlineRegex.matcher(text).find();
626 }
627
628 /**
629 * Flushes the underlying {@link Writer} object.
630 * @throws IOException if there's a problem flushing the writer
631 */
632 public void flush() throws IOException {
633 writer.flush();
634 }
635
636 /**
637 * Closes the underlying {@link Writer} object.
638 * @throws IOException if there's a problem closing the writer
639 */
640 public void close() throws IOException {
641 writer.close();
642 }
643
644 /**
645 * A listener whose methods are invoked when non-critical issues occur
646 * during the writing process.
647 * @author Michael Angstadt
648 */
649 public static interface ProblemsListener {
650 /**
651 * Called when a parameter value is changed in a lossy way, due to it
652 * containing invalid characters. If a character can be escaped (such as
653 * the "^" character when caret encoding is enabled), then this does not
654 * count as the parameter being modified because it can be decoded
655 * without losing any information.
656 * @param propertyName the name of the property to which the parameter
657 * belongs
658 * @param parameterName the parameter name
659 * @param originalValue the original parameter value
660 * @param modifiedValue the modified parameter value
661 */
662 void onParameterValueChanged(String propertyName, String parameterName, String originalValue, String modifiedValue);
663 }
664 }