001 package ezvcard.io.text; 002 003 import java.io.BufferedReader; 004 import java.io.IOException; 005 import java.io.InputStreamReader; 006 import java.io.Reader; 007 import java.io.StringReader; 008 import java.nio.charset.Charset; 009 import java.util.regex.Pattern; 010 011 import ezvcard.util.StringUtils; 012 013 /* 014 Copyright (c) 2013, Michael Angstadt 015 All rights reserved. 016 017 Redistribution and use in source and binary forms, with or without 018 modification, are permitted provided that the following conditions are met: 019 020 1. Redistributions of source code must retain the above copyright notice, this 021 list of conditions and the following disclaimer. 022 2. Redistributions in binary form must reproduce the above copyright notice, 023 this list of conditions and the following disclaimer in the documentation 024 and/or other materials provided with the distribution. 025 026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 027 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 028 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 029 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 030 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 031 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 032 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 033 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 034 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 035 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 036 037 The views and conclusions contained in the software and documentation are those 038 of the authors and should not be interpreted as representing official policies, 039 either expressed or implied, of the FreeBSD Project. 040 */ 041 042 /** 043 * Automatically unfolds lines of text as they are read. 044 * @author Michael Angstadt 045 */ 046 public class FoldedLineReader extends BufferedReader { 047 /** 048 * Regular expression used to detect "quoted-printable" property values. 049 */ 050 private static final Pattern foldedQuotedPrintableValueRegex = Pattern.compile("[^:]*?QUOTED-PRINTABLE.*?:.*?=", Pattern.CASE_INSENSITIVE); 051 052 private String lastLine; 053 private int lastLineNum = 0, lineCount = 0; 054 private final Charset charset; 055 056 /** 057 * Creates a folded line reader. 058 * @param reader the reader object to wrap 059 */ 060 public FoldedLineReader(Reader reader) { 061 super(reader); 062 if (reader instanceof InputStreamReader) { 063 InputStreamReader isr = (InputStreamReader) reader; 064 String charsetStr = isr.getEncoding(); 065 charset = (charsetStr == null) ? null : Charset.forName(charsetStr); 066 } else { 067 charset = null; 068 } 069 } 070 071 /** 072 * Creates a folded line reader. 073 * @param text the text to read 074 */ 075 public FoldedLineReader(String text) { 076 this(new StringReader(text)); 077 } 078 079 /** 080 * Gets the starting line number of the last unfolded line that was read. 081 * @return the line number 082 */ 083 public int getLineNum() { 084 return lastLineNum; 085 } 086 087 /** 088 * Gets the character encoding of the reader. 089 * @return the character encoding or null if none is defined 090 */ 091 public Charset getEncoding() { 092 return charset; 093 } 094 095 /** 096 * Reads the next non-empty line. Empty lines must be ignored because some 097 * vCards (i.e. iPhone) contain empty lines. These empty lines appear in 098 * between folded lines, which, if not ignored, will cause the parser to 099 * incorrectly parse the vCard. 100 * @return the next non-empty line or null of EOF 101 * @throws IOException if there's a problem reading from the reader 102 */ 103 private String readNonEmptyLine() throws IOException { 104 String line; 105 do { 106 line = super.readLine(); 107 if (line != null) { 108 lineCount++; 109 } 110 } while (line != null && line.length() == 0); 111 return line; 112 } 113 114 /** 115 * Reads the next unfolded line. 116 * @return the next unfolded line or null if EOF 117 * @throws IOException if there's a problem reading from the reader 118 */ 119 @Override 120 public String readLine() throws IOException { 121 String wholeLine = (lastLine == null) ? readNonEmptyLine() : lastLine; 122 lastLine = null; 123 if (wholeLine == null) { 124 //end of stream 125 return null; 126 } 127 128 //@formatter:off 129 /* 130 * Lines that are QUOTED-PRINTABLE are folded in a strange way. A "=" is 131 * appended to the end of a line to signal that the next line is folded. 132 * Also, each folded line is not prepend with whitespace. 133 * 134 * For example: 135 * 136 * ------------ 137 * BEGIN:VCARD 138 * NOTE;QUOTED-PRINTABLE: This is an=0D=0A= 139 * annoyingly formatted=0D=0A= 140 * note= 141 * 142 * END:VCARD 143 * ------------ 144 * 145 * In the example above, note how there is an empty line directly above 146 * END. This is still part of the NOTE property value because the 3rd 147 * line of NOTE ends with a "=". 148 * 149 * This behavior has only been observed in Outlook vCards. 150 */ 151 //@formatter:on 152 153 boolean foldedQuotedPrintableLine = false; 154 if (foldedQuotedPrintableValueRegex.matcher(wholeLine).matches()) { 155 foldedQuotedPrintableLine = true; 156 157 //chop off the trailing "=" 158 wholeLine = wholeLine.substring(0, wholeLine.length() - 1); 159 } 160 161 lastLineNum = lineCount; 162 StringBuilder unfoldedLine = new StringBuilder(wholeLine); 163 while (true) { 164 String line = foldedQuotedPrintableLine ? super.readLine() : readNonEmptyLine(); 165 if (line == null) { 166 //end of stream 167 break; 168 } 169 170 if (foldedQuotedPrintableLine) { 171 line = StringUtils.ltrim(line); 172 173 boolean endsInEquals = line.endsWith("="); 174 if (endsInEquals) { 175 //chop off the trailing "=" 176 line = line.substring(0, line.length() - 1); 177 } 178 179 unfoldedLine.append(line); 180 181 if (endsInEquals) { 182 //there are more folded lines 183 continue; 184 } else { 185 //end of the folded line 186 break; 187 } 188 } 189 190 if (line.length() > 0 && Character.isWhitespace(line.charAt(0))) { 191 //the line is folded 192 193 int lastWhitespace = 1; 194 //Note: Evolution will include real whitespace chars alongside the folding char 195 while (lastWhitespace < line.length() && Character.isWhitespace(line.charAt(lastWhitespace))) { 196 lastWhitespace++; 197 } 198 unfoldedLine.append(line.substring(lastWhitespace)); 199 200 continue; 201 } 202 203 lastLine = line; 204 break; 205 } 206 207 return unfoldedLine.toString(); 208 } 209 }