001    package ezvcard.io.text;
002    
003    import java.io.Closeable;
004    import java.io.Flushable;
005    import java.io.IOException;
006    import java.io.Writer;
007    import java.nio.charset.Charset;
008    import java.nio.charset.IllegalCharsetNameException;
009    import java.nio.charset.UnsupportedCharsetException;
010    import java.util.BitSet;
011    import java.util.HashMap;
012    import java.util.List;
013    import java.util.Map;
014    import java.util.regex.Pattern;
015    
016    import ezvcard.VCardVersion;
017    import ezvcard.parameter.Encoding;
018    import ezvcard.parameter.VCardParameters;
019    
020    /*
021     Copyright (c) 2013, Michael Angstadt
022     All rights reserved.
023    
024     Redistribution and use in source and binary forms, with or without
025     modification, are permitted provided that the following conditions are met: 
026    
027     1. Redistributions of source code must retain the above copyright notice, this
028     list of conditions and the following disclaimer. 
029     2. Redistributions in binary form must reproduce the above copyright notice,
030     this list of conditions and the following disclaimer in the documentation
031     and/or other materials provided with the distribution. 
032    
033     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
034     ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
035     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
036     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
037     ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
038     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
039     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
040     ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
041     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
042     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
043    
044     The views and conclusions contained in the software and documentation are those
045     of the authors and should not be interpreted as representing official policies, 
046     either expressed or implied, of the FreeBSD Project.
047     */
048    
049    /**
050     * Writes data to an vCard data stream.
051     * @author Michael Angstadt
052     */
053    public class VCardRawWriter implements Closeable, Flushable {
054            /**
055             * Regular expression used to determine if a parameter value needs to be
056             * quoted.
057             */
058            private static final Pattern quoteMeRegex = Pattern.compile(".*?[,:;].*");
059    
060            /**
061             * Regular expression used to detect newline character sequences.
062             */
063            private static final Pattern newlineRegex = Pattern.compile("\\r\\n|\\r|\\n");
064    
065            /**
066             * Regular expression used to determine if a property name contains any
067             * invalid characters.
068             */
069            private static final Pattern propertyNameRegex = Pattern.compile("(?i)[-a-z0-9]+");
070    
071            /**
072             * The characters that are not valid in parameter values and that should be
073             * removed.
074             */
075            private static final Map<VCardVersion, BitSet> invalidParamValueChars = new HashMap<VCardVersion, BitSet>();
076            static {
077                    BitSet controlChars = new BitSet(128);
078                    controlChars.set(0, 31);
079                    controlChars.set(127);
080                    controlChars.set('\t', false); //allow
081                    controlChars.set('\n', false); //allow
082                    controlChars.set('\r', false); //allow
083    
084                    //2.1
085                    {
086                            BitSet bitSet = new BitSet(128);
087                            bitSet.or(controlChars);
088    
089                            bitSet.set(',');
090                            bitSet.set('.');
091                            bitSet.set(':');
092                            bitSet.set('=');
093                            bitSet.set('[');
094                            bitSet.set(']');
095    
096                            invalidParamValueChars.put(VCardVersion.V2_1, bitSet);
097                    }
098    
099                    //3.0, 4.0
100                    {
101                            BitSet bitSet = new BitSet(128);
102                            bitSet.or(controlChars);
103    
104                            invalidParamValueChars.put(VCardVersion.V3_0, bitSet);
105                            invalidParamValueChars.put(VCardVersion.V4_0, bitSet);
106                    }
107            }
108    
109            private final String newline;
110            private boolean caretEncodingEnabled = false;
111            private final FoldingScheme foldingScheme;
112            private final FoldedLineWriter writer;
113            private ProblemsListener problemsListener;
114            private VCardVersion version;
115    
116            /**
117             * Creates a vCard raw writer using the standard folding scheme and newline
118             * sequence.
119             * @param writer the writer to the data stream
120             * @param version the vCard version to adhere to
121             */
122            public VCardRawWriter(Writer writer, VCardVersion version) {
123                    this(writer, version, FoldingScheme.MIME_DIR);
124            }
125    
126            /**
127             * Creates a vCard raw writer using the standard newline sequence.
128             * @param writer the writer to the data stream
129             * @param version the vCard version to adhere to
130             * @param foldingScheme the folding scheme to use or null not to fold at all
131             */
132            public VCardRawWriter(Writer writer, VCardVersion version, FoldingScheme foldingScheme) {
133                    this(writer, version, foldingScheme, "\r\n");
134            }
135    
136            /**
137             * Creates a vCard raw writer.
138             * @param writer the writer to the data stream
139             * @param version the vCard version to adhere to
140             * @param foldingScheme the folding scheme to use or null not to fold at all
141             * @param newline the newline sequence to use
142             */
143            public VCardRawWriter(Writer writer, VCardVersion version, FoldingScheme foldingScheme, String newline) {
144                    if (foldingScheme == null) {
145                            this.writer = new FoldedLineWriter(writer, null, "", newline);
146                    } else {
147                            this.writer = new FoldedLineWriter(writer, foldingScheme.getLineLength(), foldingScheme.getIndent(), newline);
148                    }
149                    this.version = version;
150                    this.foldingScheme = foldingScheme;
151                    this.newline = newline;
152            }
153    
154            /**
155             * <p>
156             * Gets whether the writer will apply circumflex accent encoding on
157             * parameter values (disabled by default, only applies to 3.0 and 4.0
158             * vCards). This escaping mechanism allows for newlines and double quotes to
159             * be included in parameter values.
160             * </p>
161             * 
162             * <p>
163             * When disabled, the writer will replace newlines with spaces and double
164             * quotes with single quotes.
165             * </p>
166             * 
167             * <table border="1">
168             * <tr>
169             * <th>Character</th>
170             * <th>Replacement<br>
171             * (when disabled)</th>
172             * <th>Replacement<br>
173             * (when enabled)</th>
174             * </tr>
175             * <tr>
176             * <td>{@code "}</td>
177             * <td>{@code '}</td>
178             * <td>{@code ^'}</td>
179             * </tr>
180             * <tr>
181             * <td><i>newline</i></td>
182             * <td><code><i>space</i></code></td>
183             * <td>{@code ^n}</td>
184             * </tr>
185             * <tr>
186             * <td>{@code ^}</td>
187             * <td>{@code ^}</td>
188             * <td>{@code ^^}</td>
189             * </tr>
190             * </table>
191             * 
192             * <p>
193             * Example:
194             * </p>
195             * 
196             * <pre>
197             * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt
198             *  sburgh, PA 15212":40.446816;80.00566
199             * </pre>
200             * 
201             * @return true if circumflex accent encoding is enabled, false if not
202             * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a>
203             */
204            public boolean isCaretEncodingEnabled() {
205                    return caretEncodingEnabled;
206            }
207    
208            /**
209             * <p>
210             * Sets whether the writer will apply circumflex accent encoding on
211             * parameter values (disabled by default, only applies to 3.0 and 4.0
212             * vCards). This escaping mechanism allows for newlines and double quotes to
213             * be included in parameter values.
214             * </p>
215             * 
216             * <p>
217             * When disabled, the writer will replace newlines with spaces and double
218             * quotes with single quotes.
219             * </p>
220             * 
221             * <table border="1">
222             * <tr>
223             * <th>Character</th>
224             * <th>Replacement<br>
225             * (when disabled)</th>
226             * <th>Replacement<br>
227             * (when enabled)</th>
228             * </tr>
229             * <tr>
230             * <td>{@code "}</td>
231             * <td>{@code '}</td>
232             * <td>{@code ^'}</td>
233             * </tr>
234             * <tr>
235             * <td><i>newline</i></td>
236             * <td><code><i>space</i></code></td>
237             * <td>{@code ^n}</td>
238             * </tr>
239             * <tr>
240             * <td>{@code ^}</td>
241             * <td>{@code ^}</td>
242             * <td>{@code ^^}</td>
243             * </tr>
244             * </table>
245             * 
246             * <p>
247             * Example:
248             * </p>
249             * 
250             * <pre>
251             * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPitt
252             *  sburgh, PA 15212":40.446816;80.00566
253             * </pre>
254             * 
255             * @param enable true to use circumflex accent encoding, false not to
256             * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a>
257             */
258            public void setCaretEncodingEnabled(boolean enable) {
259                    caretEncodingEnabled = enable;
260            }
261    
262            /**
263             * Gets the vCard version that the writer is adhering to.
264             * @return the version
265             */
266            public VCardVersion getVersion() {
267                    return version;
268            }
269    
270            /**
271             * Sets the vCard version that the writer should adhere to.
272             * @param version the version
273             */
274            public void setVersion(VCardVersion version) {
275                    this.version = version;
276            }
277    
278            /**
279             * Gets the newline sequence that is used to separate lines.
280             * @return the newline sequence
281             */
282            public String getNewline() {
283                    return newline;
284            }
285    
286            /**
287             * Gets the problems listener.
288             * @return the listener or null if not set
289             */
290            public ProblemsListener getProblemsListener() {
291                    return problemsListener;
292            }
293    
294            /**
295             * Sets the problems listener.
296             * @param problemsListener the listener or null to remove
297             */
298            public void setProblemsListener(ProblemsListener problemsListener) {
299                    this.problemsListener = problemsListener;
300            }
301    
302            /**
303             * Gets the rules for how each line is folded.
304             * @return the folding scheme or null if the lines are not folded
305             */
306            public FoldingScheme getFoldingScheme() {
307                    return foldingScheme;
308            }
309    
310            /**
311             * Writes a property marking the beginning of a component (in other words,
312             * writes a "BEGIN:NAME" property).
313             * @param componentName the component name (e.g. "VCARD")
314             * @throws IOException if there's an I/O problem
315             */
316            public void writeBeginComponent(String componentName) throws IOException {
317                    writeProperty("BEGIN", componentName);
318            }
319    
320            /**
321             * Writes a property marking the end of a component (in other words, writes
322             * a "END:NAME" property).
323             * @param componentName the component name (e.g. "VCARD")
324             * @throws IOException if there's an I/O problem
325             */
326            public void writeEndComponent(String componentName) throws IOException {
327                    writeProperty("END", componentName);
328            }
329    
330            /**
331             * Writes a "VERSION" property, based on the vCard version that the writer
332             * is adhering to.
333             * @throws IOException if there's an I/O problem
334             */
335            public void writeVersion() throws IOException {
336                    writeProperty("VERSION", version.getVersion());
337            }
338    
339            /**
340             * Writes a property to the vCard data stream.
341             * @param propertyName the property name (e.g. "FN")
342             * @param value the property value
343             * @throws IllegalArgumentException if the property name contains invalid
344             * characters
345             * @throws IOException if there's an I/O problem
346             */
347            public void writeProperty(String propertyName, String value) throws IOException {
348                    writeProperty(null, propertyName, new VCardParameters(), value);
349            }
350    
351            /**
352             * Writes a property to the vCard data stream.
353             * @param group the group or null if there is no group
354             * @param propertyName the property name (e.g. "FN")
355             * @param parameters the property parameters
356             * @param value the property value (will be converted to "quoted-printable"
357             * encoding if the {@link Encoding#QUOTED_PRINTABLE} parameter is set)
358             * @throws IllegalArgumentException if the group or property name contains
359             * invalid characters
360             * @throws IOException if there's an I/O problem
361             */
362            public void writeProperty(String group, String propertyName, VCardParameters parameters, String value) throws IOException {
363                    //validate the group name
364                    if (group != null && !propertyNameRegex.matcher(group).matches()) {
365                            throw new IllegalArgumentException("Group contains invalid characters.  Valid characters are letters, numbers, and hyphens: " + group);
366                    }
367    
368                    //validate the property name
369                    if (!propertyNameRegex.matcher(propertyName).matches()) {
370                            throw new IllegalArgumentException("Property name contains invalid characters.  Valid characters are letters, numbers, and hyphens: " + propertyName);
371                    }
372    
373                    value = sanitizeValue(parameters, value);
374    
375                    //determine if the property value must be encoded in quoted printable
376                    //and determine the charset to use when encoding to quoted-printable
377                    boolean quotedPrintable = (parameters.getEncoding() == Encoding.QUOTED_PRINTABLE);
378                    Charset charset = null;
379                    if (quotedPrintable) {
380                            String charsetParam = parameters.getCharset();
381                            if (charsetParam != null) {
382                                    try {
383                                            charset = Charset.forName(charsetParam);
384                                    } catch (IllegalCharsetNameException e) {
385                                            charset = null;
386                                    } catch (UnsupportedCharsetException e) {
387                                            charset = null;
388                                    }
389                            }
390                            if (charset == null) {
391                                    charset = writer.getEncoding();
392                                    if (charset == null) {
393                                            charset = Charset.defaultCharset();
394                                    }
395                            }
396                            parameters.setCharset(charset.name());
397                    }
398    
399                    //write the group
400                    if (group != null) {
401                            writer.append(group);
402                            writer.append('.');
403                    }
404    
405                    //write the property name
406                    writer.append(propertyName);
407    
408                    //write the parameters
409                    for (Map.Entry<String, List<String>> subType : parameters) {
410                            String parameterName = subType.getKey();
411                            List<String> parameterValues = subType.getValue();
412                            if (parameterValues.isEmpty()) {
413                                    continue;
414                            }
415    
416                            if (version == VCardVersion.V2_1) {
417                                    boolean isTypeParameter = VCardParameters.TYPE.equalsIgnoreCase(parameterName);
418                                    for (String parameterValue : parameterValues) {
419                                            parameterValue = sanitizeParameterValue(parameterValue, parameterName, propertyName);
420    
421                                            if (isTypeParameter) {
422                                                    //e.g. ADR;HOME;WORK:
423                                                    writer.append(';').append(parameterValue.toUpperCase());
424                                            } else {
425                                                    //e.g. ADR;FOO=bar;FOO=car:
426                                                    writer.append(';').append(parameterName).append('=').append(parameterValue);
427                                            }
428                                    }
429                            } else {
430                                    //e.g. ADR;TYPE=home,work,"another,value":
431    
432                                    boolean first = true;
433                                    writer.append(';').append(parameterName).append('=');
434                                    for (String parameterValue : parameterValues) {
435                                            if (!first) {
436                                                    writer.append(',');
437                                            }
438    
439                                            parameterValue = sanitizeParameterValue(parameterValue, parameterName, propertyName);
440    
441                                            //surround with double quotes if contains special chars
442                                            if (quoteMeRegex.matcher(parameterValue).matches()) {
443                                                    writer.append('"');
444                                                    writer.append(parameterValue);
445                                                    writer.append('"');
446                                            } else {
447                                                    writer.append(parameterValue);
448                                            }
449    
450                                            first = false;
451                                    }
452                            }
453                    }
454    
455                    writer.append(':');
456                    writer.append(value, quotedPrintable, charset);
457                    writer.append(newline);
458            }
459    
460            /**
461             * Sanitizes a property value for safe inclusion in a vCard.
462             * @param parameters the parameters
463             * @param value the value to sanitize
464             * @return the sanitized value
465             */
466            private String sanitizeValue(VCardParameters parameters, String value) {
467                    if (value == null) {
468                            return "";
469                    }
470    
471                    if (version == VCardVersion.V2_1 && containsNewlines(value)) {
472                            //2.1 does not support the "\n" escape sequence (see "Delimiters" sub-section in section 2 of the specs)
473                            parameters.setEncoding(Encoding.QUOTED_PRINTABLE);
474                            return value;
475                    }
476    
477                    return escapeNewlines(value);
478            }
479    
480            /**
481             * Removes or escapes all invalid characters in a parameter value.
482             * @param parameterValue the parameter value
483             * @param parameterName the parameter name
484             * @param propertyName the name of the property to which the parameter
485             * belongs
486             * @return the sanitized parameter value
487             */
488            private String sanitizeParameterValue(String parameterValue, String parameterName, String propertyName) {
489                    String modifiedValue = null;
490                    boolean valueChanged = false;
491    
492                    //Note: String reference comparisons ("==") are used because the Pattern class returns the same instance if the String wasn't changed
493    
494                    switch (version) {
495                    case V2_1:
496                            //remove invalid characters
497                            modifiedValue = removeInvalidParameterValueChars(parameterValue);
498    
499                            //replace newlines with spaces
500                            modifiedValue = newlineRegex.matcher(modifiedValue).replaceAll(" ");
501    
502                            //check to see if value was changed
503                            valueChanged = (parameterValue != modifiedValue);
504    
505                            //escape backslashes
506                            modifiedValue = modifiedValue.replace("\\", "\\\\");
507    
508                            //escape semi-colons (see section 2)
509                            modifiedValue = modifiedValue.replace(";", "\\;");
510    
511                            break;
512    
513                    case V3_0:
514                            //remove invalid characters
515                            modifiedValue = removeInvalidParameterValueChars(parameterValue);
516    
517                            if (caretEncodingEnabled) {
518                                    valueChanged = (modifiedValue != parameterValue);
519    
520                                    //apply caret encoding
521                                    modifiedValue = applyCaretEncoding(modifiedValue);
522                            } else {
523                                    //replace double quotes with single quotes
524                                    modifiedValue = modifiedValue.replace('"', '\'');
525    
526                                    //replace newlines with spaces
527                                    modifiedValue = newlineRegex.matcher(modifiedValue).replaceAll(" ");
528    
529                                    valueChanged = (modifiedValue != parameterValue);
530                            }
531    
532                            break;
533    
534                    case V4_0:
535                            //remove invalid characters
536                            modifiedValue = removeInvalidParameterValueChars(parameterValue);
537    
538                            if (caretEncodingEnabled) {
539                                    valueChanged = (modifiedValue != parameterValue);
540    
541                                    //apply caret encoding
542                                    modifiedValue = applyCaretEncoding(modifiedValue);
543                            } else {
544                                    //replace double quotes with single quotes
545                                    modifiedValue = modifiedValue.replace('"', '\'');
546    
547                                    valueChanged = (modifiedValue != parameterValue);
548    
549                                    //backslash-escape newlines (for the "LABEL" parameter)
550                                    modifiedValue = newlineRegex.matcher(modifiedValue).replaceAll("\\\\\\n");
551                            }
552    
553                            break;
554                    }
555    
556                    if (valueChanged && problemsListener != null) {
557                            problemsListener.onParameterValueChanged(propertyName, parameterName, parameterValue, modifiedValue);
558                    }
559    
560                    return modifiedValue;
561            }
562    
563            /**
564             * Removes invalid characters from a parameter value.
565             * @param value the parameter value
566             * @return the sanitized parameter value
567             */
568            private String removeInvalidParameterValueChars(String value) {
569                    BitSet invalidChars = invalidParamValueChars.get(version);
570                    StringBuilder sb = new StringBuilder(value.length());
571    
572                    for (int i = 0; i < value.length(); i++) {
573                            char ch = value.charAt(i);
574                            if (!invalidChars.get(ch)) {
575                                    sb.append(ch);
576                            }
577                    }
578    
579                    return (sb.length() == value.length()) ? value : sb.toString();
580            }
581    
582            /**
583             * Applies circumflex accent encoding to a string.
584             * @param value the string
585             * @return the encoded string
586             */
587            private String applyCaretEncoding(String value) {
588                    value = value.replace("^", "^^");
589                    value = newlineRegex.matcher(value).replaceAll("^n");
590                    value = value.replace("\"", "^'");
591                    return value;
592            }
593    
594            /**
595             * <p>
596             * Escapes all newline character sequences. The newline character sequences
597             * are:
598             * </p>
599             * <ul>
600             * <li>{@code \r\n}</li>
601             * <li>{@code \r}</li>
602             * <li>{@code \n}</li>
603             * </ul>
604             * @param text the text to escape
605             * @return the escaped text
606             */
607            private String escapeNewlines(String text) {
608                    return newlineRegex.matcher(text).replaceAll("\\\\n");
609            }
610    
611            /**
612             * <p>
613             * Determines if a string has at least one newline character sequence. The
614             * newline character sequences are:
615             * </p>
616             * <ul>
617             * <li>{@code \r\n}</li>
618             * <li>{@code \r}</li>
619             * <li>{@code \n}</li>
620             * </ul>
621             * @param text the text to escape
622             * @return the escaped text
623             */
624            private boolean containsNewlines(String text) {
625                    return newlineRegex.matcher(text).find();
626            }
627    
628            /**
629             * Flushes the underlying {@link Writer} object.
630             * @throws IOException if there's a problem flushing the writer
631             */
632            public void flush() throws IOException {
633                    writer.flush();
634            }
635    
636            /**
637             * Closes the underlying {@link Writer} object.
638             * @throws IOException if there's a problem closing the writer
639             */
640            public void close() throws IOException {
641                    writer.close();
642            }
643    
644            /**
645             * A listener whose methods are invoked when non-critical issues occur
646             * during the writing process.
647             * @author Michael Angstadt
648             */
649            public static interface ProblemsListener {
650                    /**
651                     * Called when a parameter value is changed in a lossy way, due to it
652                     * containing invalid characters. If a character can be escaped (such as
653                     * the "^" character when caret encoding is enabled), then this does not
654                     * count as the parameter being modified because it can be decoded
655                     * without losing any information.
656                     * @param propertyName the name of the property to which the parameter
657                     * belongs
658                     * @param parameterName the parameter name
659                     * @param originalValue the original parameter value
660                     * @param modifiedValue the modified parameter value
661                     */
662                    void onParameterValueChanged(String propertyName, String parameterName, String originalValue, String modifiedValue);
663            }
664    }