001 package ezvcard.io;
002
003 import java.io.BufferedReader;
004 import java.io.IOException;
005 import java.io.Reader;
006 import java.io.StringReader;
007 import java.util.regex.Pattern;
008
009 import ezvcard.util.VCardStringUtils;
010
011 /*
012 Copyright (c) 2012, Michael Angstadt
013 All rights reserved.
014
015 Redistribution and use in source and binary forms, with or without
016 modification, are permitted provided that the following conditions are met:
017
018 1. Redistributions of source code must retain the above copyright notice, this
019 list of conditions and the following disclaimer.
020 2. Redistributions in binary form must reproduce the above copyright notice,
021 this list of conditions and the following disclaimer in the documentation
022 and/or other materials provided with the distribution.
023
024 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
025 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
026 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
027 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
028 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
029 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
030 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
031 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
032 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
033 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
034
035 The views and conclusions contained in the software and documentation are those
036 of the authors and should not be interpreted as representing official policies,
037 either expressed or implied, of the FreeBSD Project.
038 */
039
040 /**
041 * Automatically unfolds lines of text as they are read.
042 * @author Michael Angstadt
043 */
044 public class FoldedLineReader extends BufferedReader {
045 /**
046 * Regular expression used for the incorrectly folded lines that Outlook can
047 * generate.
048 */
049 private static final Pattern outlookQuirk = Pattern.compile("[^:]*?QUOTED-PRINTABLE.*?:.*?=", Pattern.CASE_INSENSITIVE);
050
051 private String lastLine;
052
053 /**
054 * @param reader the reader object to wrap
055 */
056 public FoldedLineReader(Reader reader) {
057 super(reader);
058 }
059
060 /**
061 * @param text the text to read
062 */
063 public FoldedLineReader(String text) {
064 this(new StringReader(text));
065 }
066
067 /**
068 * Reads the next non-empty line. Empty lines must be ignored because some
069 * vCards (i.e. iPhone) contain empty lines. These empty lines appear in
070 * between folded lines, which, if not ignored, will cause the parser to
071 * incorrectly parse the vCard.
072 * @return the next non-empty line or null of EOF
073 * @throws IOException
074 */
075 private String readNonEmptyLine() throws IOException {
076 String line;
077 do {
078 line = super.readLine();
079 } while (line != null && line.length() == 0);
080 return line;
081 }
082
083 /**
084 * Reads the next line, unfolding it if necessary.
085 * @return the next line or null if EOF
086 * @throws IOException if there's a problem reading from the reader
087 */
088 @Override
089 public String readLine() throws IOException {
090 String wholeLine = (lastLine == null) ? readNonEmptyLine() : lastLine;
091 lastLine = null;
092 if (wholeLine == null) {
093 return null;
094 }
095
096 //@formatter:off
097 /*
098 * Outlook incorrectly folds lines that are QUOTED-PRINTABLE. It puts a
099 * "=" at the end of a line to signal that the line's newline characters
100 * should be ignored and that the vCard parser should continue to read
101 * the next line as if it were part of the current line. It does not
102 * prepend each additional line with whitespace.
103 *
104 * For example:
105 *
106 * ------------
107 * BEGIN:VCARD
108 * NOTE;QUOTED-PRINTABLE: This is an=0D=0A=
109 * annoyingly formatted=0D=0A=
110 * note=
111 *
112 * END:VCARD
113 * ------------
114 *
115 * In the example above, note how there is an empty line directly above
116 * END. This is still part of the NOTE property value because the 3rd
117 * line of NOTE ends with a "=".
118 */
119 //@formatter:on
120
121 boolean foldedQuotedPrintableLine = false;
122 if (outlookQuirk.matcher(wholeLine).matches()) {
123 foldedQuotedPrintableLine = true;
124 wholeLine = wholeLine.substring(0, wholeLine.length() - 1); //chop off the ending "="
125 }
126
127 //long lines are folded
128 StringBuilder wholeLineSb = new StringBuilder(wholeLine);
129 while (true) {
130 String line = foldedQuotedPrintableLine ? super.readLine() : readNonEmptyLine();
131 if (line == null) {
132 break;
133 } else if (foldedQuotedPrintableLine) {
134 line = VCardStringUtils.ltrim(line);
135
136 boolean endsInEquals = line.endsWith("=");
137 if (endsInEquals) {
138 line = line.substring(0, line.length() - 1);
139 }
140
141 wholeLineSb.append(line);
142
143 if (!endsInEquals) {
144 break;
145 }
146 } else if (line.length() > 0 && Character.isWhitespace(line.charAt(0))) {
147 //the line was folded
148
149 int lastWhitespace = 1;
150 //Evolution will include real whitespace chars alongside the folding char
151 while (lastWhitespace < line.length() && Character.isWhitespace(line.charAt(lastWhitespace))) {
152 lastWhitespace++;
153 }
154 wholeLineSb.append(line.substring(lastWhitespace));
155 } else {
156 lastLine = line;
157 break;
158 }
159 }
160 return wholeLineSb.toString();
161 }
162 }