001    package ezvcard.io.text;
002    
003    import java.io.Closeable;
004    import java.io.Flushable;
005    import java.io.IOException;
006    import java.io.Writer;
007    import java.nio.charset.Charset;
008    import java.util.BitSet;
009    import java.util.HashMap;
010    import java.util.List;
011    import java.util.Map;
012    import java.util.regex.Pattern;
013    
014    import ezvcard.VCardVersion;
015    import ezvcard.parameter.Encoding;
016    import ezvcard.parameter.VCardParameters;
017    
018    /*
019     Copyright (c) 2013, Michael Angstadt
020     All rights reserved.
021    
022     Redistribution and use in source and binary forms, with or without
023     modification, are permitted provided that the following conditions are met: 
024    
025     1. Redistributions of source code must retain the above copyright notice, this
026     list of conditions and the following disclaimer. 
027     2. Redistributions in binary form must reproduce the above copyright notice,
028     this list of conditions and the following disclaimer in the documentation
029     and/or other materials provided with the distribution. 
030    
031     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
032     ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
033     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
034     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
035     ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
036     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
037     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
038     ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
039     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
040     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
041    
042     The views and conclusions contained in the software and documentation are those
043     of the authors and should not be interpreted as representing official policies, 
044     either expressed or implied, of the FreeBSD Project.
045     */
046    
047    /**
048     * Writes data to an vCard data stream.
049     * @author Michael Angstadt
050     */
051    public class VCardRawWriter implements Closeable, Flushable {
052            /**
053             * Regular expression used to determine if a parameter value needs to be
054             * quoted.
055             */
056            private static final Pattern quoteMeRegex = Pattern.compile(".*?[,:;].*");
057    
058            /**
059             * Regular expression used to detect newline character sequences.
060             */
061            private static final Pattern newlineRegex = Pattern.compile("\\r\\n|\\r|\\n");
062    
063            /**
064             * Regular expression used to determine if a property name contains any
065             * invalid characters.
066             */
067            private static final Pattern propertyNameRegex = Pattern.compile("(?i)[-a-z0-9]+");
068    
069            /**
070             * The characters that are not valid in parameter values and that should be
071             * removed.
072             */
073            private static final Map<VCardVersion, BitSet> invalidParamValueChars = new HashMap<VCardVersion, BitSet>();
074            static {
075                    BitSet controlChars = new BitSet(128);
076                    controlChars.set(0, 31);
077                    controlChars.set(127);
078                    controlChars.set('\t', false); //allow
079                    controlChars.set('\n', false); //allow
080                    controlChars.set('\r', false); //allow
081    
082                    //2.1
083                    {
084                            BitSet bitSet = new BitSet(128);
085                            bitSet.or(controlChars);
086    
087                            bitSet.set(',');
088                            bitSet.set('.');
089                            bitSet.set(':');
090                            bitSet.set('=');
091                            bitSet.set('[');
092                            bitSet.set(']');
093    
094                            invalidParamValueChars.put(VCardVersion.V2_1, bitSet);
095                    }
096    
097                    //3.0, 4.0
098                    {
099                            BitSet bitSet = new BitSet(128);
100                            bitSet.or(controlChars);
101    
102                            invalidParamValueChars.put(VCardVersion.V3_0, bitSet);
103                            invalidParamValueChars.put(VCardVersion.V4_0, bitSet);
104                    }
105            }
106    
107            private final String newline;
108            private boolean caretEncodingEnabled = false;
109            private final FoldingScheme foldingScheme;
110            private final FoldedLineWriter writer;
111            private ProblemsListener problemsListener;
112            private VCardVersion version;
113    
114            /**
115             * Creates a vCard raw writer using the standard folding scheme and newline
116             * sequence.
117             * @param writer the writer to the data stream
118             * @param version the vCard version to adhere to
119             */
120            public VCardRawWriter(Writer writer, VCardVersion version) {
121                    this(writer, version, FoldingScheme.MIME_DIR);
122            }
123    
124            /**
125             * Creates a vCard raw writer using the standard newline sequence.
126             * @param writer the writer to the data stream
127             * @param version the vCard version to adhere to
128             * @param foldingScheme the folding scheme to use or null not to fold at all
129             */
130            public VCardRawWriter(Writer writer, VCardVersion version, FoldingScheme foldingScheme) {
131                    this(writer, version, foldingScheme, "\r\n");
132            }
133    
134            /**
135             * Creates a vCard raw writer.
136             * @param writer the writer to the data stream
137             * @param version the vCard version to adhere to
138             * @param foldingScheme the folding scheme to use or null not to fold at all
139             * @param newline the newline sequence to use
140             */
141            public VCardRawWriter(Writer writer, VCardVersion version, FoldingScheme foldingScheme, String newline) {
142                    if (foldingScheme == null) {
143                            this.writer = new FoldedLineWriter(writer, null, "", newline);
144                    } else {
145                            this.writer = new FoldedLineWriter(writer, foldingScheme.getLineLength(), foldingScheme.getIndent(), newline);
146                    }
147                    this.version = version;
148                    this.foldingScheme = foldingScheme;
149                    this.newline = newline;
150            }
151    
152            /**
153             * <p>
154             * Gets whether the writer will apply circumflex accent encoding on
155             * parameter values (disabled by default, only applies to 3.0 and 4.0
156             * vCards). This escaping mechanism allows for newlines and double quotes to
157             * be included in parameter values.
158             * </p>
159             * 
160             * <p>
161             * When disabled, the writer will replace newlines with spaces and double
162             * quotes with single quotes.
163             * </p>
164             * 
165             * <table border="1">
166             * <tr>
167             * <th>Character</th>
168             * <th>Replacement<br>
169             * (when disabled)</th>
170             * <th>Replacement<br>
171             * (when enabled)</th>
172             * </tr>
173             * <tr>
174             * <td>{@code "}</td>
175             * <td>{@code '}</td>
176             * <td>{@code ^'}</td>
177             * </tr>
178             * <tr>
179             * <td><i>newline</i></td>
180             * <td><code><i>space</i></code></td>
181             * <td>{@code ^n}</td>
182             * </tr>
183             * <tr>
184             * <td>{@code ^}</td>
185             * <td>{@code ^}</td>
186             * <td>{@code ^^}</td>
187             * </tr>
188             * </table>
189             * 
190             * <p>
191             * Example:
192             * </p>
193             * 
194             * <pre>
195             * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt
196             *  sburgh, PA 15212":40.446816;80.00566
197             * </pre>
198             * 
199             * @return true if circumflex accent encoding is enabled, false if not
200             * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a>
201             */
202            public boolean isCaretEncodingEnabled() {
203                    return caretEncodingEnabled;
204            }
205    
206            /**
207             * <p>
208             * Sets whether the writer will apply circumflex accent encoding on
209             * parameter values (disabled by default, only applies to 3.0 and 4.0
210             * vCards). This escaping mechanism allows for newlines and double quotes to
211             * be included in parameter values.
212             * </p>
213             * 
214             * <p>
215             * When disabled, the writer will replace newlines with spaces and double
216             * quotes with single quotes.
217             * </p>
218             * 
219             * <table border="1">
220             * <tr>
221             * <th>Character</th>
222             * <th>Replacement<br>
223             * (when disabled)</th>
224             * <th>Replacement<br>
225             * (when enabled)</th>
226             * </tr>
227             * <tr>
228             * <td>{@code "}</td>
229             * <td>{@code '}</td>
230             * <td>{@code ^'}</td>
231             * </tr>
232             * <tr>
233             * <td><i>newline</i></td>
234             * <td><code><i>space</i></code></td>
235             * <td>{@code ^n}</td>
236             * </tr>
237             * <tr>
238             * <td>{@code ^}</td>
239             * <td>{@code ^}</td>
240             * <td>{@code ^^}</td>
241             * </tr>
242             * </table>
243             * 
244             * <p>
245             * Example:
246             * </p>
247             * 
248             * <pre>
249             * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt
250             *  sburgh, PA 15212":40.446816;80.00566
251             * </pre>
252             * 
253             * @param enable true to use circumflex accent encoding, false not to
254             * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a>
255             */
256            public void setCaretEncodingEnabled(boolean enable) {
257                    caretEncodingEnabled = enable;
258            }
259    
260            /**
261             * Gets the vCard version that the writer is adhering to.
262             * @return the version
263             */
264            public VCardVersion getVersion() {
265                    return version;
266            }
267    
268            /**
269             * Sets the vCard version that the writer should adhere to.
270             * @param version the version
271             */
272            public void setVersion(VCardVersion version) {
273                    this.version = version;
274            }
275    
276            /**
277             * Gets the newline sequence that is used to separate lines.
278             * @return the newline sequence
279             */
280            public String getNewline() {
281                    return newline;
282            }
283    
284            /**
285             * Gets the problems listener.
286             * @return the listener or null if not set
287             */
288            public ProblemsListener getProblemsListener() {
289                    return problemsListener;
290            }
291    
292            /**
293             * Sets the problems listener.
294             * @param problemsListener the listener or null to remove
295             */
296            public void setProblemsListener(ProblemsListener problemsListener) {
297                    this.problemsListener = problemsListener;
298            }
299    
300            /**
301             * Gets the rules for how each line is folded.
302             * @return the folding scheme or null if the lines are not folded
303             */
304            public FoldingScheme getFoldingScheme() {
305                    return foldingScheme;
306            }
307    
308            /**
309             * Writes a property marking the beginning of a component (in other words,
310             * writes a "BEGIN:NAME" property).
311             * @param componentName the component name (e.g. "VCARD")
312             * @throws IOException if there's an I/O problem
313             */
314            public void writeBeginComponent(String componentName) throws IOException {
315                    writeProperty("BEGIN", componentName);
316            }
317    
318            /**
319             * Writes a property marking the end of a component (in other words, writes
320             * a "END:NAME" property).
321             * @param componentName the component name (e.g. "VCARD")
322             * @throws IOException if there's an I/O problem
323             */
324            public void writeEndComponent(String componentName) throws IOException {
325                    writeProperty("END", componentName);
326            }
327    
328            /**
329             * Writes a "VERSION" property, based on the vCard version that the writer
330             * is adhering to.
331             * @throws IOException if there's an I/O problem
332             */
333            public void writeVersion() throws IOException {
334                    writeProperty("VERSION", version.getVersion());
335            }
336    
337            /**
338             * Writes a property to the vCard data stream.
339             * @param propertyName the property name (e.g. "FN")
340             * @param value the property value
341             * @throws IllegalArgumentException if the property name contains invalid
342             * characters
343             * @throws IOException if there's an I/O problem
344             */
345            public void writeProperty(String propertyName, String value) throws IOException {
346                    writeProperty(null, propertyName, new VCardParameters(), value);
347            }
348    
349            /**
350             * Writes a property to the vCard data stream.
351             * @param group the group or null if there is no group
352             * @param propertyName the property name (e.g. "FN")
353             * @param parameters the property parameters
354             * @param value the property value (will be converted to "quoted-printable"
355             * encoding if the {@link Encoding#QUOTED_PRINTABLE} parameter is set)
356             * @throws IllegalArgumentException if the group or property name contains
357             * invalid characters
358             * @throws IOException if there's an I/O problem
359             */
360            public void writeProperty(String group, String propertyName, VCardParameters parameters, String value) throws IOException {
361                    //validate the group name
362                    if (group != null && !propertyNameRegex.matcher(group).matches()) {
363                            throw new IllegalArgumentException("Group contains invalid characters.  Valid characters are letters, numbers, and hyphens: " + group);
364                    }
365    
366                    //validate the property name
367                    if (!propertyNameRegex.matcher(propertyName).matches()) {
368                            throw new IllegalArgumentException("Property name contains invalid characters.  Valid characters are letters, numbers, and hyphens: " + propertyName);
369                    }
370    
371                    value = sanitizeValue(parameters, value);
372    
373                    //determine if the property value must be encoded in quoted printable
374                    //and determine the charset to use when encoding to quoted-printable
375                    boolean quotedPrintable = (parameters.getEncoding() == Encoding.QUOTED_PRINTABLE);
376                    Charset charset = null;
377                    if (quotedPrintable) {
378                            String charsetParam = parameters.getCharset();
379                            if (charsetParam == null) {
380                                    charset = Charset.forName("UTF-8");
381                            } else {
382                                    try {
383                                            charset = Charset.forName(charsetParam);
384                                    } catch (Throwable e) {
385                                            charset = Charset.forName("UTF-8");
386                                    }
387                            }
388                            parameters.setCharset(charset.name());
389                    }
390    
391                    //write the group
392                    if (group != null) {
393                            writer.append(group);
394                            writer.append('.');
395                    }
396    
397                    //write the property name
398                    writer.append(propertyName);
399    
400                    //write the parameters
401                    for (Map.Entry<String, List<String>> subType : parameters) {
402                            String parameterName = subType.getKey();
403                            List<String> parameterValues = subType.getValue();
404                            if (parameterValues.isEmpty()) {
405                                    continue;
406                            }
407    
408                            if (version == VCardVersion.V2_1) {
409                                    boolean isTypeParameter = VCardParameters.TYPE.equalsIgnoreCase(parameterName);
410                                    for (String parameterValue : parameterValues) {
411                                            parameterValue = sanitizeParameterValue(parameterValue, parameterName, propertyName);
412    
413                                            if (isTypeParameter) {
414                                                    //e.g. ADR;HOME;WORK:
415                                                    writer.append(';').append(parameterValue.toUpperCase());
416                                            } else {
417                                                    //e.g. ADR;FOO=bar;FOO=car:
418                                                    writer.append(';').append(parameterName).append('=').append(parameterValue);
419                                            }
420                                    }
421                            } else {
422                                    //e.g. ADR;TYPE=home,work,"another,value":
423    
424                                    boolean first = true;
425                                    writer.append(';').append(parameterName).append('=');
426                                    for (String parameterValue : parameterValues) {
427                                            if (!first) {
428                                                    writer.append(',');
429                                            }
430    
431                                            parameterValue = sanitizeParameterValue(parameterValue, parameterName, propertyName);
432    
433                                            //surround with double quotes if contains special chars
434                                            if (quoteMeRegex.matcher(parameterValue).matches()) {
435                                                    writer.append('"');
436                                                    writer.append(parameterValue);
437                                                    writer.append('"');
438                                            } else {
439                                                    writer.append(parameterValue);
440                                            }
441    
442                                            first = false;
443                                    }
444                            }
445                    }
446    
447                    writer.append(':');
448                    writer.append(value, quotedPrintable, charset);
449                    writer.append(newline);
450            }
451    
452            /**
453             * Sanitizes a property value for safe inclusion in a vCard.
454             * @param parameters the parameters
455             * @param value the value to sanitize
456             * @return the sanitized value
457             */
458            private String sanitizeValue(VCardParameters parameters, String value) {
459                    if (value == null) {
460                            return "";
461                    }
462    
463                    if (version == VCardVersion.V2_1 && containsNewlines(value)) {
464                            //2.1 does not support the "\n" escape sequence (see "Delimiters" sub-section in section 2 of the specs)
465                            parameters.setEncoding(Encoding.QUOTED_PRINTABLE);
466                            return value;
467                    }
468    
469                    return escapeNewlines(value);
470            }
471    
472            /**
473             * Removes or escapes all invalid characters in a parameter value.
474             * @param parameterValue the parameter value
475             * @param parameterName the parameter name
476             * @param propertyName the name of the property to which the parameter
477             * belongs
478             * @return the sanitized parameter value
479             */
480            private String sanitizeParameterValue(String parameterValue, String parameterName, String propertyName) {
481                    String modifiedValue = null;
482                    boolean valueChanged = false;
483    
484                    //Note: String reference comparisons ("==") are used because the Pattern class returns the same instance if the String wasn't changed
485    
486                    switch (version) {
487                    case V2_1:
488                            //remove invalid characters
489                            modifiedValue = removeInvalidParameterValueChars(parameterValue);
490    
491                            //replace newlines with spaces
492                            modifiedValue = newlineRegex.matcher(modifiedValue).replaceAll(" ");
493    
494                            //check to see if value was changed
495                            valueChanged = (parameterValue != modifiedValue);
496    
497                            //escape backslashes
498                            modifiedValue = modifiedValue.replace("\\", "\\\\");
499    
500                            //escape semi-colons (see section 2)
501                            modifiedValue = modifiedValue.replace(";", "\\;");
502    
503                            break;
504    
505                    case V3_0:
506                            //remove invalid characters
507                            modifiedValue = removeInvalidParameterValueChars(parameterValue);
508    
509                            if (caretEncodingEnabled) {
510                                    valueChanged = (modifiedValue != parameterValue);
511    
512                                    //apply caret encoding
513                                    modifiedValue = applyCaretEncoding(modifiedValue);
514                            } else {
515                                    //replace double quotes with single quotes
516                                    modifiedValue = modifiedValue.replace('"', '\'');
517    
518                                    //replace newlines with spaces
519                                    modifiedValue = newlineRegex.matcher(modifiedValue).replaceAll(" ");
520    
521                                    valueChanged = (modifiedValue != parameterValue);
522                            }
523    
524                            break;
525    
526                    case V4_0:
527                            //remove invalid characters
528                            modifiedValue = removeInvalidParameterValueChars(parameterValue);
529    
530                            if (caretEncodingEnabled) {
531                                    valueChanged = (modifiedValue != parameterValue);
532    
533                                    //apply caret encoding
534                                    modifiedValue = applyCaretEncoding(modifiedValue);
535                            } else {
536                                    //replace double quotes with single quotes
537                                    modifiedValue = modifiedValue.replace('"', '\'');
538    
539                                    valueChanged = (modifiedValue != parameterValue);
540    
541                                    //backslash-escape newlines (for the "LABEL" parameter)
542                                    modifiedValue = newlineRegex.matcher(modifiedValue).replaceAll("\\\\\\n");
543                            }
544    
545                            break;
546                    }
547    
548                    if (valueChanged && problemsListener != null) {
549                            problemsListener.onParameterValueChanged(propertyName, parameterName, parameterValue, modifiedValue);
550                    }
551    
552                    return modifiedValue;
553            }
554    
555            /**
556             * Removes invalid characters from a parameter value.
557             * @param value the parameter value
558             * @return the sanitized parameter value
559             */
560            private String removeInvalidParameterValueChars(String value) {
561                    BitSet invalidChars = invalidParamValueChars.get(version);
562                    StringBuilder sb = new StringBuilder(value.length());
563    
564                    for (int i = 0; i < value.length(); i++) {
565                            char ch = value.charAt(i);
566                            if (!invalidChars.get(ch)) {
567                                    sb.append(ch);
568                            }
569                    }
570    
571                    return (sb.length() == value.length()) ? value : sb.toString();
572            }
573    
574            /**
575             * Applies circumflex accent encoding to a string.
576             * @param value the string
577             * @return the encoded string
578             */
579            private String applyCaretEncoding(String value) {
580                    value = value.replace("^", "^^");
581                    value = newlineRegex.matcher(value).replaceAll("^n");
582                    value = value.replace("\"", "^'");
583                    return value;
584            }
585    
586            /**
587             * <p>
588             * Escapes all newline character sequences. The newline character sequences
589             * are:
590             * </p>
591             * <ul>
592             * <li>{@code \r\n}</li>
593             * <li>{@code \r}</li>
594             * <li>{@code \n}</li>
595             * </ul>
596             * @param text the text to escape
597             * @return the escaped text
598             */
599            private String escapeNewlines(String text) {
600                    return newlineRegex.matcher(text).replaceAll("\\\\n");
601            }
602    
603            /**
604             * <p>
605             * Determines if a string has at least one newline character sequence. The
606             * newline character sequences are:
607             * </p>
608             * <ul>
609             * <li>{@code \r\n}</li>
610             * <li>{@code \r}</li>
611             * <li>{@code \n}</li>
612             * </ul>
613             * @param text the text to escape
614             * @return the escaped text
615             */
616            private boolean containsNewlines(String text) {
617                    return newlineRegex.matcher(text).find();
618            }
619    
620            /**
621             * Flushes the underlying {@link Writer} object.
622             * @throws IOException if there's a problem flushing the writer
623             */
624            public void flush() throws IOException {
625                    writer.flush();
626            }
627    
628            /**
629             * Closes the underlying {@link Writer} object.
630             * @throws IOException if there's a problem closing the writer
631             */
632            public void close() throws IOException {
633                    writer.close();
634            }
635    
636            /**
637             * A listener whose methods are invoked when non-critical issues occur
638             * during the writing process.
639             * @author Michael Angstadt
640             */
641            public static interface ProblemsListener {
642                    /**
643                     * Called when a parameter value is changed in a lossy way, due to it
644                     * containing invalid characters. If a character can be escaped (such as
645                     * the "^" character when caret encoding is enabled), then this does not
646                     * count as the parameter being modified because it can be decoded
647                     * without losing any information.
648                     * @param propertyName the name of the property to which the parameter
649                     * belongs
650                     * @param parameterName the parameter name
651                     * @param originalValue the original parameter value
652                     * @param modifiedValue the modified parameter value
653                     */
654                    void onParameterValueChanged(String propertyName, String parameterName, String originalValue, String modifiedValue);
655            }
656    }