001 package ezvcard.io.text; 002 003 import java.io.Closeable; 004 import java.io.Flushable; 005 import java.io.IOException; 006 import java.io.Writer; 007 import java.nio.charset.Charset; 008 import java.nio.charset.IllegalCharsetNameException; 009 import java.nio.charset.UnsupportedCharsetException; 010 import java.util.BitSet; 011 import java.util.HashMap; 012 import java.util.List; 013 import java.util.Map; 014 import java.util.regex.Pattern; 015 016 import ezvcard.VCardVersion; 017 import ezvcard.parameter.Encoding; 018 import ezvcard.parameter.VCardParameters; 019 020 /* 021 Copyright (c) 2013, Michael Angstadt 022 All rights reserved. 023 024 Redistribution and use in source and binary forms, with or without 025 modification, are permitted provided that the following conditions are met: 026 027 1. Redistributions of source code must retain the above copyright notice, this 028 list of conditions and the following disclaimer. 029 2. Redistributions in binary form must reproduce the above copyright notice, 030 this list of conditions and the following disclaimer in the documentation 031 and/or other materials provided with the distribution. 032 033 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 034 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 035 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 036 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 037 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 038 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 039 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 040 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 041 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 042 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 043 044 The views and conclusions contained in the software and documentation are those 045 of the authors and should not be interpreted as representing official policies, 046 either expressed or implied, of the FreeBSD Project. 047 */ 048 049 /** 050 * Writes data to an vCard data stream. 051 * @author Michael Angstadt 052 */ 053 public class VCardRawWriter implements Closeable, Flushable { 054 /** 055 * Regular expression used to determine if a parameter value needs to be 056 * quoted. 057 */ 058 private static final Pattern quoteMeRegex = Pattern.compile(".*?[,:;].*"); 059 060 /** 061 * Regular expression used to detect newline character sequences. 062 */ 063 private static final Pattern newlineRegex = Pattern.compile("\\r\\n|\\r|\\n"); 064 065 /** 066 * Regular expression used to determine if a property name contains any 067 * invalid characters. 068 */ 069 private static final Pattern propertyNameRegex = Pattern.compile("(?i)[-a-z0-9]+"); 070 071 /** 072 * The characters that are not valid in parameter values and that should be 073 * removed. 074 */ 075 private static final Map<VCardVersion, BitSet> invalidParamValueChars = new HashMap<VCardVersion, BitSet>(); 076 static { 077 BitSet controlChars = new BitSet(128); 078 controlChars.set(0, 31); 079 controlChars.set(127); 080 controlChars.set('\t', false); //allow 081 controlChars.set('\n', false); //allow 082 controlChars.set('\r', false); //allow 083 084 //2.1 085 { 086 BitSet bitSet = new BitSet(128); 087 bitSet.or(controlChars); 088 089 bitSet.set(','); 090 bitSet.set('.'); 091 bitSet.set(':'); 092 bitSet.set('='); 093 bitSet.set('['); 094 bitSet.set(']'); 095 096 invalidParamValueChars.put(VCardVersion.V2_1, bitSet); 097 } 098 099 //3.0, 4.0 100 { 101 BitSet bitSet = new BitSet(128); 102 bitSet.or(controlChars); 103 104 invalidParamValueChars.put(VCardVersion.V3_0, bitSet); 105 invalidParamValueChars.put(VCardVersion.V4_0, bitSet); 106 } 107 } 108 109 private final String newline; 110 private boolean caretEncodingEnabled = false; 111 private final FoldingScheme foldingScheme; 112 private final FoldedLineWriter writer; 113 private ProblemsListener problemsListener; 114 private VCardVersion version; 115 116 /** 117 * Creates a vCard raw writer using the standard folding scheme and newline 118 * sequence. 119 * @param writer the writer to the data stream 120 * @param version the vCard version to adhere to 121 */ 122 public VCardRawWriter(Writer writer, VCardVersion version) { 123 this(writer, version, FoldingScheme.MIME_DIR); 124 } 125 126 /** 127 * Creates a vCard raw writer using the standard newline sequence. 128 * @param writer the writer to the data stream 129 * @param version the vCard version to adhere to 130 * @param foldingScheme the folding scheme to use or null not to fold at all 131 */ 132 public VCardRawWriter(Writer writer, VCardVersion version, FoldingScheme foldingScheme) { 133 this(writer, version, foldingScheme, "\r\n"); 134 } 135 136 /** 137 * Creates a vCard raw writer. 138 * @param writer the writer to the data stream 139 * @param version the vCard version to adhere to 140 * @param foldingScheme the folding scheme to use or null not to fold at all 141 * @param newline the newline sequence to use 142 */ 143 public VCardRawWriter(Writer writer, VCardVersion version, FoldingScheme foldingScheme, String newline) { 144 if (foldingScheme == null) { 145 this.writer = new FoldedLineWriter(writer, null, "", newline); 146 } else { 147 this.writer = new FoldedLineWriter(writer, foldingScheme.getLineLength(), foldingScheme.getIndent(), newline); 148 } 149 this.version = version; 150 this.foldingScheme = foldingScheme; 151 this.newline = newline; 152 } 153 154 /** 155 * <p> 156 * Gets whether the writer will apply circumflex accent encoding on 157 * parameter values (disabled by default, only applies to 3.0 and 4.0 158 * vCards). This escaping mechanism allows for newlines and double quotes to 159 * be included in parameter values. 160 * </p> 161 * 162 * <p> 163 * When disabled, the writer will replace newlines with spaces and double 164 * quotes with single quotes. 165 * </p> 166 * 167 * <table border="1"> 168 * <tr> 169 * <th>Character</th> 170 * <th>Replacement<br> 171 * (when disabled)</th> 172 * <th>Replacement<br> 173 * (when enabled)</th> 174 * </tr> 175 * <tr> 176 * <td>{@code "}</td> 177 * <td>{@code '}</td> 178 * <td>{@code ^'}</td> 179 * </tr> 180 * <tr> 181 * <td><i>newline</i></td> 182 * <td><code><i>space</i></code></td> 183 * <td>{@code ^n}</td> 184 * </tr> 185 * <tr> 186 * <td>{@code ^}</td> 187 * <td>{@code ^}</td> 188 * <td>{@code ^^}</td> 189 * </tr> 190 * </table> 191 * 192 * <p> 193 * Example: 194 * </p> 195 * 196 * <pre> 197 * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt 198 * sburgh, PA 15212":40.446816;80.00566 199 * </pre> 200 * 201 * @return true if circumflex accent encoding is enabled, false if not 202 * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a> 203 */ 204 public boolean isCaretEncodingEnabled() { 205 return caretEncodingEnabled; 206 } 207 208 /** 209 * <p> 210 * Sets whether the writer will apply circumflex accent encoding on 211 * parameter values (disabled by default, only applies to 3.0 and 4.0 212 * vCards). This escaping mechanism allows for newlines and double quotes to 213 * be included in parameter values. 214 * </p> 215 * 216 * <p> 217 * When disabled, the writer will replace newlines with spaces and double 218 * quotes with single quotes. 219 * </p> 220 * 221 * <table border="1"> 222 * <tr> 223 * <th>Character</th> 224 * <th>Replacement<br> 225 * (when disabled)</th> 226 * <th>Replacement<br> 227 * (when enabled)</th> 228 * </tr> 229 * <tr> 230 * <td>{@code "}</td> 231 * <td>{@code '}</td> 232 * <td>{@code ^'}</td> 233 * </tr> 234 * <tr> 235 * <td><i>newline</i></td> 236 * <td><code><i>space</i></code></td> 237 * <td>{@code ^n}</td> 238 * </tr> 239 * <tr> 240 * <td>{@code ^}</td> 241 * <td>{@code ^}</td> 242 * <td>{@code ^^}</td> 243 * </tr> 244 * </table> 245 * 246 * <p> 247 * Example: 248 * </p> 249 * 250 * <pre> 251 * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt 252 * sburgh, PA 15212":40.446816;80.00566 253 * </pre> 254 * 255 * @param enable true to use circumflex accent encoding, false not to 256 * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a> 257 */ 258 public void setCaretEncodingEnabled(boolean enable) { 259 caretEncodingEnabled = enable; 260 } 261 262 /** 263 * Gets the vCard version that the writer is adhering to. 264 * @return the version 265 */ 266 public VCardVersion getVersion() { 267 return version; 268 } 269 270 /** 271 * Sets the vCard version that the writer should adhere to. 272 * @param version the version 273 */ 274 public void setVersion(VCardVersion version) { 275 this.version = version; 276 } 277 278 /** 279 * Gets the newline sequence that is used to separate lines. 280 * @return the newline sequence 281 */ 282 public String getNewline() { 283 return newline; 284 } 285 286 /** 287 * Gets the problems listener. 288 * @return the listener or null if not set 289 */ 290 public ProblemsListener getProblemsListener() { 291 return problemsListener; 292 } 293 294 /** 295 * Sets the problems listener. 296 * @param problemsListener the listener or null to remove 297 */ 298 public void setProblemsListener(ProblemsListener problemsListener) { 299 this.problemsListener = problemsListener; 300 } 301 302 /** 303 * Gets the rules for how each line is folded. 304 * @return the folding scheme or null if the lines are not folded 305 */ 306 public FoldingScheme getFoldingScheme() { 307 return foldingScheme; 308 } 309 310 /** 311 * Writes a property marking the beginning of a component (in other words, 312 * writes a "BEGIN:NAME" property). 313 * @param componentName the component name (e.g. "VCARD") 314 * @throws IOException if there's an I/O problem 315 */ 316 public void writeBeginComponent(String componentName) throws IOException { 317 writeProperty("BEGIN", componentName); 318 } 319 320 /** 321 * Writes a property marking the end of a component (in other words, writes 322 * a "END:NAME" property). 323 * @param componentName the component name (e.g. "VCARD") 324 * @throws IOException if there's an I/O problem 325 */ 326 public void writeEndComponent(String componentName) throws IOException { 327 writeProperty("END", componentName); 328 } 329 330 /** 331 * Writes a "VERSION" property, based on the vCard version that the writer 332 * is adhering to. 333 * @throws IOException if there's an I/O problem 334 */ 335 public void writeVersion() throws IOException { 336 writeProperty("VERSION", version.getVersion()); 337 } 338 339 /** 340 * Writes a property to the vCard data stream. 341 * @param propertyName the property name (e.g. "FN") 342 * @param value the property value 343 * @throws IllegalArgumentException if the property name contains invalid 344 * characters 345 * @throws IOException if there's an I/O problem 346 */ 347 public void writeProperty(String propertyName, String value) throws IOException { 348 writeProperty(null, propertyName, new VCardParameters(), value); 349 } 350 351 /** 352 * Writes a property to the vCard data stream. 353 * @param group the group or null if there is no group 354 * @param propertyName the property name (e.g. "FN") 355 * @param parameters the property parameters 356 * @param value the property value (will be converted to "quoted-printable" 357 * encoding if the {@link Encoding#QUOTED_PRINTABLE} parameter is set) 358 * @throws IllegalArgumentException if the group or property name contains 359 * invalid characters 360 * @throws IOException if there's an I/O problem 361 */ 362 public void writeProperty(String group, String propertyName, VCardParameters parameters, String value) throws IOException { 363 //validate the group name 364 if (group != null && !propertyNameRegex.matcher(group).matches()) { 365 throw new IllegalArgumentException("Group contains invalid characters. Valid characters are letters, numbers, and hyphens: " + group); 366 } 367 368 //validate the property name 369 if (!propertyNameRegex.matcher(propertyName).matches()) { 370 throw new IllegalArgumentException("Property name contains invalid characters. Valid characters are letters, numbers, and hyphens: " + propertyName); 371 } 372 373 value = sanitizeValue(parameters, value); 374 375 //determine if the property value must be encoded in quoted printable 376 //and determine the charset to use when encoding to quoted-printable 377 boolean quotedPrintable = (parameters.getEncoding() == Encoding.QUOTED_PRINTABLE); 378 Charset charset = null; 379 if (quotedPrintable) { 380 String charsetParam = parameters.getCharset(); 381 if (charsetParam != null) { 382 try { 383 charset = Charset.forName(charsetParam); 384 } catch (IllegalCharsetNameException e) { 385 charset = null; 386 } catch (UnsupportedCharsetException e) { 387 charset = null; 388 } 389 } 390 if (charset == null) { 391 charset = writer.getEncoding(); 392 if (charset == null) { 393 charset = Charset.defaultCharset(); 394 } 395 } 396 parameters.setCharset(charset.name()); 397 } 398 399 //write the group 400 if (group != null) { 401 writer.append(group); 402 writer.append('.'); 403 } 404 405 //write the property name 406 writer.append(propertyName); 407 408 //write the parameters 409 for (Map.Entry<String, List<String>> subType : parameters) { 410 String parameterName = subType.getKey(); 411 List<String> parameterValues = subType.getValue(); 412 if (parameterValues.isEmpty()) { 413 continue; 414 } 415 416 if (version == VCardVersion.V2_1) { 417 boolean isTypeParameter = VCardParameters.TYPE.equalsIgnoreCase(parameterName); 418 for (String parameterValue : parameterValues) { 419 parameterValue = sanitizeParameterValue(parameterValue, parameterName, propertyName); 420 421 if (isTypeParameter) { 422 //e.g. ADR;HOME;WORK: 423 writer.append(';').append(parameterValue.toUpperCase()); 424 } else { 425 //e.g. ADR;FOO=bar;FOO=car: 426 writer.append(';').append(parameterName).append('=').append(parameterValue); 427 } 428 } 429 } else { 430 //e.g. ADR;TYPE=home,work,"another,value": 431 432 boolean first = true; 433 writer.append(';').append(parameterName).append('='); 434 for (String parameterValue : parameterValues) { 435 if (!first) { 436 writer.append(','); 437 } 438 439 parameterValue = sanitizeParameterValue(parameterValue, parameterName, propertyName); 440 441 //surround with double quotes if contains special chars 442 if (quoteMeRegex.matcher(parameterValue).matches()) { 443 writer.append('"'); 444 writer.append(parameterValue); 445 writer.append('"'); 446 } else { 447 writer.append(parameterValue); 448 } 449 450 first = false; 451 } 452 } 453 } 454 455 writer.append(':'); 456 writer.append(value, quotedPrintable, charset); 457 writer.append(newline); 458 } 459 460 /** 461 * Sanitizes a property value for safe inclusion in a vCard. 462 * @param parameters the parameters 463 * @param value the value to sanitize 464 * @return the sanitized value 465 */ 466 private String sanitizeValue(VCardParameters parameters, String value) { 467 if (value == null) { 468 return ""; 469 } 470 471 if (version == VCardVersion.V2_1 && containsNewlines(value)) { 472 //2.1 does not support the "\n" escape sequence (see "Delimiters" sub-section in section 2 of the specs) 473 parameters.setEncoding(Encoding.QUOTED_PRINTABLE); 474 return value; 475 } 476 477 return escapeNewlines(value); 478 } 479 480 /** 481 * Removes or escapes all invalid characters in a parameter value. 482 * @param parameterValue the parameter value 483 * @param parameterName the parameter name 484 * @param propertyName the name of the property to which the parameter 485 * belongs 486 * @return the sanitized parameter value 487 */ 488 private String sanitizeParameterValue(String parameterValue, String parameterName, String propertyName) { 489 String modifiedValue = null; 490 boolean valueChanged = false; 491 492 //Note: String reference comparisons ("==") are used because the Pattern class returns the same instance if the String wasn't changed 493 494 switch (version) { 495 case V2_1: 496 //remove invalid characters 497 modifiedValue = removeInvalidParameterValueChars(parameterValue); 498 499 //replace newlines with spaces 500 modifiedValue = newlineRegex.matcher(modifiedValue).replaceAll(" "); 501 502 //check to see if value was changed 503 valueChanged = (parameterValue != modifiedValue); 504 505 //escape backslashes 506 modifiedValue = modifiedValue.replace("\\", "\\\\"); 507 508 //escape semi-colons (see section 2) 509 modifiedValue = modifiedValue.replace(";", "\\;"); 510 511 break; 512 513 case V3_0: 514 //remove invalid characters 515 modifiedValue = removeInvalidParameterValueChars(parameterValue); 516 517 if (caretEncodingEnabled) { 518 valueChanged = (modifiedValue != parameterValue); 519 520 //apply caret encoding 521 modifiedValue = applyCaretEncoding(modifiedValue); 522 } else { 523 //replace double quotes with single quotes 524 modifiedValue = modifiedValue.replace('"', '\''); 525 526 //replace newlines with spaces 527 modifiedValue = newlineRegex.matcher(modifiedValue).replaceAll(" "); 528 529 valueChanged = (modifiedValue != parameterValue); 530 } 531 532 break; 533 534 case V4_0: 535 //remove invalid characters 536 modifiedValue = removeInvalidParameterValueChars(parameterValue); 537 538 if (caretEncodingEnabled) { 539 valueChanged = (modifiedValue != parameterValue); 540 541 //apply caret encoding 542 modifiedValue = applyCaretEncoding(modifiedValue); 543 } else { 544 //replace double quotes with single quotes 545 modifiedValue = modifiedValue.replace('"', '\''); 546 547 valueChanged = (modifiedValue != parameterValue); 548 549 //backslash-escape newlines (for the "LABEL" parameter) 550 modifiedValue = newlineRegex.matcher(modifiedValue).replaceAll("\\\\\\n"); 551 } 552 553 break; 554 } 555 556 if (valueChanged && problemsListener != null) { 557 problemsListener.onParameterValueChanged(propertyName, parameterName, parameterValue, modifiedValue); 558 } 559 560 return modifiedValue; 561 } 562 563 /** 564 * Removes invalid characters from a parameter value. 565 * @param value the parameter value 566 * @return the sanitized parameter value 567 */ 568 private String removeInvalidParameterValueChars(String value) { 569 BitSet invalidChars = invalidParamValueChars.get(version); 570 StringBuilder sb = new StringBuilder(value.length()); 571 572 for (int i = 0; i < value.length(); i++) { 573 char ch = value.charAt(i); 574 if (!invalidChars.get(ch)) { 575 sb.append(ch); 576 } 577 } 578 579 return (sb.length() == value.length()) ? value : sb.toString(); 580 } 581 582 /** 583 * Applies circumflex accent encoding to a string. 584 * @param value the string 585 * @return the encoded string 586 */ 587 private String applyCaretEncoding(String value) { 588 value = value.replace("^", "^^"); 589 value = newlineRegex.matcher(value).replaceAll("^n"); 590 value = value.replace("\"", "^'"); 591 return value; 592 } 593 594 /** 595 * <p> 596 * Escapes all newline character sequences. The newline character sequences 597 * are: 598 * </p> 599 * <ul> 600 * <li>{@code \r\n}</li> 601 * <li>{@code \r}</li> 602 * <li>{@code \n}</li> 603 * </ul> 604 * @param text the text to escape 605 * @return the escaped text 606 */ 607 private String escapeNewlines(String text) { 608 return newlineRegex.matcher(text).replaceAll("\\\\n"); 609 } 610 611 /** 612 * <p> 613 * Determines if a string has at least one newline character sequence. The 614 * newline character sequences are: 615 * </p> 616 * <ul> 617 * <li>{@code \r\n}</li> 618 * <li>{@code \r}</li> 619 * <li>{@code \n}</li> 620 * </ul> 621 * @param text the text to escape 622 * @return the escaped text 623 */ 624 private boolean containsNewlines(String text) { 625 return newlineRegex.matcher(text).find(); 626 } 627 628 /** 629 * Flushes the underlying {@link Writer} object. 630 * @throws IOException if there's a problem flushing the writer 631 */ 632 public void flush() throws IOException { 633 writer.flush(); 634 } 635 636 /** 637 * Closes the underlying {@link Writer} object. 638 * @throws IOException if there's a problem closing the writer 639 */ 640 public void close() throws IOException { 641 writer.close(); 642 } 643 644 /** 645 * A listener whose methods are invoked when non-critical issues occur 646 * during the writing process. 647 * @author Michael Angstadt 648 */ 649 public static interface ProblemsListener { 650 /** 651 * Called when a parameter value is changed in a lossy way, due to it 652 * containing invalid characters. If a character can be escaped (such as 653 * the "^" character when caret encoding is enabled), then this does not 654 * count as the parameter being modified because it can be decoded 655 * without losing any information. 656 * @param propertyName the name of the property to which the parameter 657 * belongs 658 * @param parameterName the parameter name 659 * @param originalValue the original parameter value 660 * @param modifiedValue the modified parameter value 661 */ 662 void onParameterValueChanged(String propertyName, String parameterName, String originalValue, String modifiedValue); 663 } 664 }