001 package ezvcard.io.text; 002 003 import java.io.Closeable; 004 import java.io.Flushable; 005 import java.io.IOException; 006 import java.io.Writer; 007 import java.nio.charset.Charset; 008 import java.util.BitSet; 009 import java.util.HashMap; 010 import java.util.List; 011 import java.util.Map; 012 import java.util.regex.Pattern; 013 014 import ezvcard.VCardVersion; 015 import ezvcard.parameter.Encoding; 016 import ezvcard.parameter.VCardParameters; 017 018 /* 019 Copyright (c) 2013, Michael Angstadt 020 All rights reserved. 021 022 Redistribution and use in source and binary forms, with or without 023 modification, are permitted provided that the following conditions are met: 024 025 1. Redistributions of source code must retain the above copyright notice, this 026 list of conditions and the following disclaimer. 027 2. Redistributions in binary form must reproduce the above copyright notice, 028 this list of conditions and the following disclaimer in the documentation 029 and/or other materials provided with the distribution. 030 031 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 032 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 033 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 034 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 035 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 036 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 037 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 038 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 039 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 040 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 041 042 The views and conclusions contained in the software and documentation are those 043 of the authors and should not be interpreted as representing official policies, 044 either expressed or implied, of the FreeBSD Project. 045 */ 046 047 /** 048 * Writes data to an vCard data stream. 049 * @author Michael Angstadt 050 */ 051 public class VCardRawWriter implements Closeable, Flushable { 052 /** 053 * Regular expression used to determine if a parameter value needs to be 054 * quoted. 055 */ 056 private static final Pattern quoteMeRegex = Pattern.compile(".*?[,:;].*"); 057 058 /** 059 * Regular expression used to detect newline character sequences. 060 */ 061 private static final Pattern newlineRegex = Pattern.compile("\\r\\n|\\r|\\n"); 062 063 /** 064 * Regular expression used to determine if a property name contains any 065 * invalid characters. 066 */ 067 private static final Pattern propertyNameRegex = Pattern.compile("(?i)[-a-z0-9]+"); 068 069 /** 070 * The characters that are not valid in parameter values and that should be 071 * removed. 072 */ 073 private static final Map<VCardVersion, BitSet> invalidParamValueChars = new HashMap<VCardVersion, BitSet>(); 074 static { 075 BitSet controlChars = new BitSet(128); 076 controlChars.set(0, 31); 077 controlChars.set(127); 078 controlChars.set('\t', false); //allow 079 controlChars.set('\n', false); //allow 080 controlChars.set('\r', false); //allow 081 082 //2.1 083 { 084 BitSet bitSet = new BitSet(128); 085 bitSet.or(controlChars); 086 087 bitSet.set(','); 088 bitSet.set('.'); 089 bitSet.set(':'); 090 bitSet.set('='); 091 bitSet.set('['); 092 bitSet.set(']'); 093 094 invalidParamValueChars.put(VCardVersion.V2_1, bitSet); 095 } 096 097 //3.0, 4.0 098 { 099 BitSet bitSet = new BitSet(128); 100 bitSet.or(controlChars); 101 102 invalidParamValueChars.put(VCardVersion.V3_0, bitSet); 103 invalidParamValueChars.put(VCardVersion.V4_0, bitSet); 104 } 105 } 106 107 private final String newline; 108 private boolean caretEncodingEnabled = false; 109 private final FoldingScheme foldingScheme; 110 private final FoldedLineWriter writer; 111 private ProblemsListener problemsListener; 112 private VCardVersion version; 113 114 /** 115 * Creates a vCard raw writer using the standard folding scheme and newline 116 * sequence. 117 * @param writer the writer to the data stream 118 * @param version the vCard version to adhere to 119 */ 120 public VCardRawWriter(Writer writer, VCardVersion version) { 121 this(writer, version, FoldingScheme.MIME_DIR); 122 } 123 124 /** 125 * Creates a vCard raw writer using the standard newline sequence. 126 * @param writer the writer to the data stream 127 * @param version the vCard version to adhere to 128 * @param foldingScheme the folding scheme to use or null not to fold at all 129 */ 130 public VCardRawWriter(Writer writer, VCardVersion version, FoldingScheme foldingScheme) { 131 this(writer, version, foldingScheme, "\r\n"); 132 } 133 134 /** 135 * Creates a vCard raw writer. 136 * @param writer the writer to the data stream 137 * @param version the vCard version to adhere to 138 * @param foldingScheme the folding scheme to use or null not to fold at all 139 * @param newline the newline sequence to use 140 */ 141 public VCardRawWriter(Writer writer, VCardVersion version, FoldingScheme foldingScheme, String newline) { 142 if (foldingScheme == null) { 143 this.writer = new FoldedLineWriter(writer, null, "", newline); 144 } else { 145 this.writer = new FoldedLineWriter(writer, foldingScheme.getLineLength(), foldingScheme.getIndent(), newline); 146 } 147 this.version = version; 148 this.foldingScheme = foldingScheme; 149 this.newline = newline; 150 } 151 152 /** 153 * <p> 154 * Gets whether the writer will apply circumflex accent encoding on 155 * parameter values (disabled by default, only applies to 3.0 and 4.0 156 * vCards). This escaping mechanism allows for newlines and double quotes to 157 * be included in parameter values. 158 * </p> 159 * 160 * <p> 161 * When disabled, the writer will replace newlines with spaces and double 162 * quotes with single quotes. 163 * </p> 164 * 165 * <table border="1"> 166 * <tr> 167 * <th>Character</th> 168 * <th>Replacement<br> 169 * (when disabled)</th> 170 * <th>Replacement<br> 171 * (when enabled)</th> 172 * </tr> 173 * <tr> 174 * <td>{@code "}</td> 175 * <td>{@code '}</td> 176 * <td>{@code ^'}</td> 177 * </tr> 178 * <tr> 179 * <td><i>newline</i></td> 180 * <td><code><i>space</i></code></td> 181 * <td>{@code ^n}</td> 182 * </tr> 183 * <tr> 184 * <td>{@code ^}</td> 185 * <td>{@code ^}</td> 186 * <td>{@code ^^}</td> 187 * </tr> 188 * </table> 189 * 190 * <p> 191 * Example: 192 * </p> 193 * 194 * <pre> 195 * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt 196 * sburgh, PA 15212":40.446816;80.00566 197 * </pre> 198 * 199 * @return true if circumflex accent encoding is enabled, false if not 200 * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a> 201 */ 202 public boolean isCaretEncodingEnabled() { 203 return caretEncodingEnabled; 204 } 205 206 /** 207 * <p> 208 * Sets whether the writer will apply circumflex accent encoding on 209 * parameter values (disabled by default, only applies to 3.0 and 4.0 210 * vCards). This escaping mechanism allows for newlines and double quotes to 211 * be included in parameter values. 212 * </p> 213 * 214 * <p> 215 * When disabled, the writer will replace newlines with spaces and double 216 * quotes with single quotes. 217 * </p> 218 * 219 * <table border="1"> 220 * <tr> 221 * <th>Character</th> 222 * <th>Replacement<br> 223 * (when disabled)</th> 224 * <th>Replacement<br> 225 * (when enabled)</th> 226 * </tr> 227 * <tr> 228 * <td>{@code "}</td> 229 * <td>{@code '}</td> 230 * <td>{@code ^'}</td> 231 * </tr> 232 * <tr> 233 * <td><i>newline</i></td> 234 * <td><code><i>space</i></code></td> 235 * <td>{@code ^n}</td> 236 * </tr> 237 * <tr> 238 * <td>{@code ^}</td> 239 * <td>{@code ^}</td> 240 * <td>{@code ^^}</td> 241 * </tr> 242 * </table> 243 * 244 * <p> 245 * Example: 246 * </p> 247 * 248 * <pre> 249 * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt 250 * sburgh, PA 15212":40.446816;80.00566 251 * </pre> 252 * 253 * @param enable true to use circumflex accent encoding, false not to 254 * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a> 255 */ 256 public void setCaretEncodingEnabled(boolean enable) { 257 caretEncodingEnabled = enable; 258 } 259 260 /** 261 * Gets the vCard version that the writer is adhering to. 262 * @return the version 263 */ 264 public VCardVersion getVersion() { 265 return version; 266 } 267 268 /** 269 * Sets the vCard version that the writer should adhere to. 270 * @param version the version 271 */ 272 public void setVersion(VCardVersion version) { 273 this.version = version; 274 } 275 276 /** 277 * Gets the newline sequence that is used to separate lines. 278 * @return the newline sequence 279 */ 280 public String getNewline() { 281 return newline; 282 } 283 284 /** 285 * Gets the problems listener. 286 * @return the listener or null if not set 287 */ 288 public ProblemsListener getProblemsListener() { 289 return problemsListener; 290 } 291 292 /** 293 * Sets the problems listener. 294 * @param problemsListener the listener or null to remove 295 */ 296 public void setProblemsListener(ProblemsListener problemsListener) { 297 this.problemsListener = problemsListener; 298 } 299 300 /** 301 * Gets the rules for how each line is folded. 302 * @return the folding scheme or null if the lines are not folded 303 */ 304 public FoldingScheme getFoldingScheme() { 305 return foldingScheme; 306 } 307 308 /** 309 * Writes a property marking the beginning of a component (in other words, 310 * writes a "BEGIN:NAME" property). 311 * @param componentName the component name (e.g. "VCARD") 312 * @throws IOException if there's an I/O problem 313 */ 314 public void writeBeginComponent(String componentName) throws IOException { 315 writeProperty("BEGIN", componentName); 316 } 317 318 /** 319 * Writes a property marking the end of a component (in other words, writes 320 * a "END:NAME" property). 321 * @param componentName the component name (e.g. "VCARD") 322 * @throws IOException if there's an I/O problem 323 */ 324 public void writeEndComponent(String componentName) throws IOException { 325 writeProperty("END", componentName); 326 } 327 328 /** 329 * Writes a "VERSION" property, based on the vCard version that the writer 330 * is adhering to. 331 * @throws IOException if there's an I/O problem 332 */ 333 public void writeVersion() throws IOException { 334 writeProperty("VERSION", version.getVersion()); 335 } 336 337 /** 338 * Writes a property to the vCard data stream. 339 * @param propertyName the property name (e.g. "FN") 340 * @param value the property value 341 * @throws IllegalArgumentException if the property name contains invalid 342 * characters 343 * @throws IOException if there's an I/O problem 344 */ 345 public void writeProperty(String propertyName, String value) throws IOException { 346 writeProperty(null, propertyName, new VCardParameters(), value); 347 } 348 349 /** 350 * Writes a property to the vCard data stream. 351 * @param group the group or null if there is no group 352 * @param propertyName the property name (e.g. "FN") 353 * @param parameters the property parameters 354 * @param value the property value (will be converted to "quoted-printable" 355 * encoding if the {@link Encoding#QUOTED_PRINTABLE} parameter is set) 356 * @throws IllegalArgumentException if the group or property name contains 357 * invalid characters 358 * @throws IOException if there's an I/O problem 359 */ 360 public void writeProperty(String group, String propertyName, VCardParameters parameters, String value) throws IOException { 361 //validate the group name 362 if (group != null && !propertyNameRegex.matcher(group).matches()) { 363 throw new IllegalArgumentException("Group contains invalid characters. Valid characters are letters, numbers, and hyphens: " + group); 364 } 365 366 //validate the property name 367 if (!propertyNameRegex.matcher(propertyName).matches()) { 368 throw new IllegalArgumentException("Property name contains invalid characters. Valid characters are letters, numbers, and hyphens: " + propertyName); 369 } 370 371 value = sanitizeValue(parameters, value); 372 373 //determine if the property value must be encoded in quoted printable 374 //and determine the charset to use when encoding to quoted-printable 375 boolean quotedPrintable = (parameters.getEncoding() == Encoding.QUOTED_PRINTABLE); 376 Charset charset = null; 377 if (quotedPrintable) { 378 String charsetParam = parameters.getCharset(); 379 if (charsetParam == null) { 380 charset = Charset.forName("UTF-8"); 381 } else { 382 try { 383 charset = Charset.forName(charsetParam); 384 } catch (Throwable e) { 385 charset = Charset.forName("UTF-8"); 386 } 387 } 388 parameters.setCharset(charset.name()); 389 } 390 391 //write the group 392 if (group != null) { 393 writer.append(group); 394 writer.append('.'); 395 } 396 397 //write the property name 398 writer.append(propertyName); 399 400 //write the parameters 401 for (Map.Entry<String, List<String>> subType : parameters) { 402 String parameterName = subType.getKey(); 403 List<String> parameterValues = subType.getValue(); 404 if (parameterValues.isEmpty()) { 405 continue; 406 } 407 408 if (version == VCardVersion.V2_1) { 409 boolean isTypeParameter = VCardParameters.TYPE.equalsIgnoreCase(parameterName); 410 for (String parameterValue : parameterValues) { 411 parameterValue = sanitizeParameterValue(parameterValue, parameterName, propertyName); 412 413 if (isTypeParameter) { 414 //e.g. ADR;HOME;WORK: 415 writer.append(';').append(parameterValue.toUpperCase()); 416 } else { 417 //e.g. ADR;FOO=bar;FOO=car: 418 writer.append(';').append(parameterName).append('=').append(parameterValue); 419 } 420 } 421 } else { 422 //e.g. ADR;TYPE=home,work,"another,value": 423 424 boolean first = true; 425 writer.append(';').append(parameterName).append('='); 426 for (String parameterValue : parameterValues) { 427 if (!first) { 428 writer.append(','); 429 } 430 431 parameterValue = sanitizeParameterValue(parameterValue, parameterName, propertyName); 432 433 //surround with double quotes if contains special chars 434 if (quoteMeRegex.matcher(parameterValue).matches()) { 435 writer.append('"'); 436 writer.append(parameterValue); 437 writer.append('"'); 438 } else { 439 writer.append(parameterValue); 440 } 441 442 first = false; 443 } 444 } 445 } 446 447 writer.append(':'); 448 writer.append(value, quotedPrintable, charset); 449 writer.append(newline); 450 } 451 452 /** 453 * Sanitizes a property value for safe inclusion in a vCard. 454 * @param parameters the parameters 455 * @param value the value to sanitize 456 * @return the sanitized value 457 */ 458 private String sanitizeValue(VCardParameters parameters, String value) { 459 if (value == null) { 460 return ""; 461 } 462 463 if (version == VCardVersion.V2_1 && containsNewlines(value)) { 464 //2.1 does not support the "\n" escape sequence (see "Delimiters" sub-section in section 2 of the specs) 465 parameters.setEncoding(Encoding.QUOTED_PRINTABLE); 466 return value; 467 } 468 469 return escapeNewlines(value); 470 } 471 472 /** 473 * Removes or escapes all invalid characters in a parameter value. 474 * @param parameterValue the parameter value 475 * @param parameterName the parameter name 476 * @param propertyName the name of the property to which the parameter 477 * belongs 478 * @return the sanitized parameter value 479 */ 480 private String sanitizeParameterValue(String parameterValue, String parameterName, String propertyName) { 481 String modifiedValue = null; 482 boolean valueChanged = false; 483 484 //Note: String reference comparisons ("==") are used because the Pattern class returns the same instance if the String wasn't changed 485 486 switch (version) { 487 case V2_1: 488 //remove invalid characters 489 modifiedValue = removeInvalidParameterValueChars(parameterValue); 490 491 //replace newlines with spaces 492 modifiedValue = newlineRegex.matcher(modifiedValue).replaceAll(" "); 493 494 //check to see if value was changed 495 valueChanged = (parameterValue != modifiedValue); 496 497 //escape backslashes 498 modifiedValue = modifiedValue.replace("\\", "\\\\"); 499 500 //escape semi-colons (see section 2) 501 modifiedValue = modifiedValue.replace(";", "\\;"); 502 503 break; 504 505 case V3_0: 506 //remove invalid characters 507 modifiedValue = removeInvalidParameterValueChars(parameterValue); 508 509 if (caretEncodingEnabled) { 510 valueChanged = (modifiedValue != parameterValue); 511 512 //apply caret encoding 513 modifiedValue = applyCaretEncoding(modifiedValue); 514 } else { 515 //replace double quotes with single quotes 516 modifiedValue = modifiedValue.replace('"', '\''); 517 518 //replace newlines with spaces 519 modifiedValue = newlineRegex.matcher(modifiedValue).replaceAll(" "); 520 521 valueChanged = (modifiedValue != parameterValue); 522 } 523 524 break; 525 526 case V4_0: 527 //remove invalid characters 528 modifiedValue = removeInvalidParameterValueChars(parameterValue); 529 530 if (caretEncodingEnabled) { 531 valueChanged = (modifiedValue != parameterValue); 532 533 //apply caret encoding 534 modifiedValue = applyCaretEncoding(modifiedValue); 535 } else { 536 //replace double quotes with single quotes 537 modifiedValue = modifiedValue.replace('"', '\''); 538 539 valueChanged = (modifiedValue != parameterValue); 540 541 //backslash-escape newlines (for the "LABEL" parameter) 542 modifiedValue = newlineRegex.matcher(modifiedValue).replaceAll("\\\\\\n"); 543 } 544 545 break; 546 } 547 548 if (valueChanged && problemsListener != null) { 549 problemsListener.onParameterValueChanged(propertyName, parameterName, parameterValue, modifiedValue); 550 } 551 552 return modifiedValue; 553 } 554 555 /** 556 * Removes invalid characters from a parameter value. 557 * @param value the parameter value 558 * @return the sanitized parameter value 559 */ 560 private String removeInvalidParameterValueChars(String value) { 561 BitSet invalidChars = invalidParamValueChars.get(version); 562 StringBuilder sb = new StringBuilder(value.length()); 563 564 for (int i = 0; i < value.length(); i++) { 565 char ch = value.charAt(i); 566 if (!invalidChars.get(ch)) { 567 sb.append(ch); 568 } 569 } 570 571 return (sb.length() == value.length()) ? value : sb.toString(); 572 } 573 574 /** 575 * Applies circumflex accent encoding to a string. 576 * @param value the string 577 * @return the encoded string 578 */ 579 private String applyCaretEncoding(String value) { 580 value = value.replace("^", "^^"); 581 value = newlineRegex.matcher(value).replaceAll("^n"); 582 value = value.replace("\"", "^'"); 583 return value; 584 } 585 586 /** 587 * <p> 588 * Escapes all newline character sequences. The newline character sequences 589 * are: 590 * </p> 591 * <ul> 592 * <li>{@code \r\n}</li> 593 * <li>{@code \r}</li> 594 * <li>{@code \n}</li> 595 * </ul> 596 * @param text the text to escape 597 * @return the escaped text 598 */ 599 private String escapeNewlines(String text) { 600 return newlineRegex.matcher(text).replaceAll("\\\\n"); 601 } 602 603 /** 604 * <p> 605 * Determines if a string has at least one newline character sequence. The 606 * newline character sequences are: 607 * </p> 608 * <ul> 609 * <li>{@code \r\n}</li> 610 * <li>{@code \r}</li> 611 * <li>{@code \n}</li> 612 * </ul> 613 * @param text the text to escape 614 * @return the escaped text 615 */ 616 private boolean containsNewlines(String text) { 617 return newlineRegex.matcher(text).find(); 618 } 619 620 /** 621 * Flushes the underlying {@link Writer} object. 622 * @throws IOException if there's a problem flushing the writer 623 */ 624 public void flush() throws IOException { 625 writer.flush(); 626 } 627 628 /** 629 * Closes the underlying {@link Writer} object. 630 * @throws IOException if there's a problem closing the writer 631 */ 632 public void close() throws IOException { 633 writer.close(); 634 } 635 636 /** 637 * A listener whose methods are invoked when non-critical issues occur 638 * during the writing process. 639 * @author Michael Angstadt 640 */ 641 public static interface ProblemsListener { 642 /** 643 * Called when a parameter value is changed in a lossy way, due to it 644 * containing invalid characters. If a character can be escaped (such as 645 * the "^" character when caret encoding is enabled), then this does not 646 * count as the parameter being modified because it can be decoded 647 * without losing any information. 648 * @param propertyName the name of the property to which the parameter 649 * belongs 650 * @param parameterName the parameter name 651 * @param originalValue the original parameter value 652 * @param modifiedValue the modified parameter value 653 */ 654 void onParameterValueChanged(String propertyName, String parameterName, String originalValue, String modifiedValue); 655 } 656 }