001 package ezvcard.io.text;
002
003 import java.io.BufferedReader;
004 import java.io.IOException;
005 import java.io.InputStreamReader;
006 import java.io.Reader;
007 import java.io.StringReader;
008 import java.nio.charset.Charset;
009 import java.util.regex.Pattern;
010
011 import ezvcard.util.StringUtils;
012
013 /*
014 Copyright (c) 2013, Michael Angstadt
015 All rights reserved.
016
017 Redistribution and use in source and binary forms, with or without
018 modification, are permitted provided that the following conditions are met:
019
020 1. Redistributions of source code must retain the above copyright notice, this
021 list of conditions and the following disclaimer.
022 2. Redistributions in binary form must reproduce the above copyright notice,
023 this list of conditions and the following disclaimer in the documentation
024 and/or other materials provided with the distribution.
025
026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
027 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
028 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
029 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
030 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
031 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
032 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
033 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
034 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
035 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
036
037 The views and conclusions contained in the software and documentation are those
038 of the authors and should not be interpreted as representing official policies,
039 either expressed or implied, of the FreeBSD Project.
040 */
041
042 /**
043 * Automatically unfolds lines of text as they are read.
044 * @author Michael Angstadt
045 */
046 public class FoldedLineReader extends BufferedReader {
047 /**
048 * Regular expression used to detect "quoted-printable" property values.
049 */
050 private static final Pattern foldedQuotedPrintableValueRegex = Pattern.compile("[^:]*?QUOTED-PRINTABLE.*?:.*?=", Pattern.CASE_INSENSITIVE);
051
052 private String lastLine;
053 private int lastLineNum = 0, lineCount = 0;
054 private final Charset charset;
055
056 /**
057 * Creates a folded line reader.
058 * @param reader the reader object to wrap
059 */
060 public FoldedLineReader(Reader reader) {
061 super(reader);
062 if (reader instanceof InputStreamReader) {
063 InputStreamReader isr = (InputStreamReader) reader;
064 String charsetStr = isr.getEncoding();
065 charset = (charsetStr == null) ? null : Charset.forName(charsetStr);
066 } else {
067 charset = null;
068 }
069 }
070
071 /**
072 * Creates a folded line reader.
073 * @param text the text to read
074 */
075 public FoldedLineReader(String text) {
076 this(new StringReader(text));
077 }
078
079 /**
080 * Gets the starting line number of the last unfolded line that was read.
081 * @return the line number
082 */
083 public int getLineNum() {
084 return lastLineNum;
085 }
086
087 /**
088 * Gets the character encoding of the reader.
089 * @return the character encoding or null if none is defined
090 */
091 public Charset getEncoding() {
092 return charset;
093 }
094
095 /**
096 * Reads the next non-empty line. Empty lines must be ignored because some
097 * vCards (i.e. iPhone) contain empty lines. These empty lines appear in
098 * between folded lines, which, if not ignored, will cause the parser to
099 * incorrectly parse the vCard.
100 * @return the next non-empty line or null of EOF
101 * @throws IOException if there's a problem reading from the reader
102 */
103 private String readNonEmptyLine() throws IOException {
104 String line;
105 do {
106 line = super.readLine();
107 if (line != null) {
108 lineCount++;
109 }
110 } while (line != null && line.length() == 0);
111 return line;
112 }
113
114 /**
115 * Reads the next unfolded line.
116 * @return the next unfolded line or null if EOF
117 * @throws IOException if there's a problem reading from the reader
118 */
119 @Override
120 public String readLine() throws IOException {
121 String wholeLine = (lastLine == null) ? readNonEmptyLine() : lastLine;
122 lastLine = null;
123 if (wholeLine == null) {
124 //end of stream
125 return null;
126 }
127
128 //@formatter:off
129 /*
130 * Lines that are QUOTED-PRINTABLE are folded in a strange way. A "=" is
131 * appended to the end of a line to signal that the next line is folded.
132 * Also, each folded line is not prepend with whitespace.
133 *
134 * For example:
135 *
136 * ------------
137 * BEGIN:VCARD
138 * NOTE;QUOTED-PRINTABLE: This is an=0D=0A=
139 * annoyingly formatted=0D=0A=
140 * note=
141 *
142 * END:VCARD
143 * ------------
144 *
145 * In the example above, note how there is an empty line directly above
146 * END. This is still part of the NOTE property value because the 3rd
147 * line of NOTE ends with a "=".
148 *
149 * This behavior has only been observed in Outlook vCards.
150 */
151 //@formatter:on
152
153 boolean foldedQuotedPrintableLine = false;
154 if (foldedQuotedPrintableValueRegex.matcher(wholeLine).matches()) {
155 foldedQuotedPrintableLine = true;
156
157 //chop off the trailing "="
158 wholeLine = wholeLine.substring(0, wholeLine.length() - 1);
159 }
160
161 lastLineNum = lineCount;
162 StringBuilder unfoldedLine = new StringBuilder(wholeLine);
163 while (true) {
164 String line = foldedQuotedPrintableLine ? super.readLine() : readNonEmptyLine();
165 if (line == null) {
166 //end of stream
167 break;
168 }
169
170 if (foldedQuotedPrintableLine) {
171 line = StringUtils.ltrim(line);
172
173 boolean endsInEquals = line.endsWith("=");
174 if (endsInEquals) {
175 //chop off the trailing "="
176 line = line.substring(0, line.length() - 1);
177 }
178
179 unfoldedLine.append(line);
180
181 if (endsInEquals) {
182 //there are more folded lines
183 continue;
184 } else {
185 //end of the folded line
186 break;
187 }
188 }
189
190 if (line.length() > 0 && Character.isWhitespace(line.charAt(0))) {
191 //the line is folded
192
193 int lastWhitespace = 1;
194 //Note: Evolution will include real whitespace chars alongside the folding char
195 while (lastWhitespace < line.length() && Character.isWhitespace(line.charAt(lastWhitespace))) {
196 lastWhitespace++;
197 }
198 unfoldedLine.append(line.substring(lastWhitespace));
199
200 continue;
201 }
202
203 lastLine = line;
204 break;
205 }
206
207 return unfoldedLine.toString();
208 }
209 }