001    package ezvcard.io.text;
002    
003    import java.io.BufferedReader;
004    import java.io.IOException;
005    import java.io.InputStreamReader;
006    import java.io.Reader;
007    import java.io.StringReader;
008    import java.nio.charset.Charset;
009    import java.util.regex.Pattern;
010    
011    import ezvcard.util.StringUtils;
012    
013    /*
014     Copyright (c) 2013, Michael Angstadt
015     All rights reserved.
016    
017     Redistribution and use in source and binary forms, with or without
018     modification, are permitted provided that the following conditions are met: 
019    
020     1. Redistributions of source code must retain the above copyright notice, this
021     list of conditions and the following disclaimer. 
022     2. Redistributions in binary form must reproduce the above copyright notice,
023     this list of conditions and the following disclaimer in the documentation
024     and/or other materials provided with the distribution. 
025    
026     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
027     ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
028     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
029     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
030     ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
031     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
032     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
033     ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
034     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
035     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
036    
037     The views and conclusions contained in the software and documentation are those
038     of the authors and should not be interpreted as representing official policies, 
039     either expressed or implied, of the FreeBSD Project.
040     */
041    
042    /**
043     * Automatically unfolds lines of text as they are read.
044     * @author Michael Angstadt
045     */
046    public class FoldedLineReader extends BufferedReader {
047            /**
048             * Regular expression used to detect "quoted-printable" property values.
049             */
050            private static final Pattern foldedQuotedPrintableValueRegex = Pattern.compile("[^:]*?QUOTED-PRINTABLE.*?:.*?=", Pattern.CASE_INSENSITIVE);
051    
052            private String lastLine;
053            private int lastLineNum = 0, lineCount = 0;
054            private final Charset charset;
055    
056            /**
057             * Creates a folded line reader.
058             * @param reader the reader object to wrap
059             */
060            public FoldedLineReader(Reader reader) {
061                    super(reader);
062                    if (reader instanceof InputStreamReader) {
063                            InputStreamReader isr = (InputStreamReader) reader;
064                            String charsetStr = isr.getEncoding();
065                            charset = (charsetStr == null) ? null : Charset.forName(charsetStr);
066                    } else {
067                            charset = null;
068                    }
069            }
070    
071            /**
072             * Creates a folded line reader.
073             * @param text the text to read
074             */
075            public FoldedLineReader(String text) {
076                    this(new StringReader(text));
077            }
078    
079            /**
080             * Gets the starting line number of the last unfolded line that was read.
081             * @return the line number
082             */
083            public int getLineNum() {
084                    return lastLineNum;
085            }
086    
087            /**
088             * Gets the character encoding of the reader.
089             * @return the character encoding or null if none is defined
090             */
091            public Charset getEncoding() {
092                    return charset;
093            }
094    
095            /**
096             * Reads the next non-empty line. Empty lines must be ignored because some
097             * vCards (i.e. iPhone) contain empty lines. These empty lines appear in
098             * between folded lines, which, if not ignored, will cause the parser to
099             * incorrectly parse the vCard.
100             * @return the next non-empty line or null of EOF
101             * @throws IOException if there's a problem reading from the reader
102             */
103            private String readNonEmptyLine() throws IOException {
104                    String line;
105                    do {
106                            line = super.readLine();
107                            if (line != null) {
108                                    lineCount++;
109                            }
110                    } while (line != null && line.length() == 0);
111                    return line;
112            }
113    
114            /**
115             * Reads the next unfolded line.
116             * @return the next unfolded line or null if EOF
117             * @throws IOException if there's a problem reading from the reader
118             */
119            @Override
120            public String readLine() throws IOException {
121                    String wholeLine = (lastLine == null) ? readNonEmptyLine() : lastLine;
122                    lastLine = null;
123                    if (wholeLine == null) {
124                            //end of stream
125                            return null;
126                    }
127    
128                    //@formatter:off
129                    /*
130                     * Lines that are QUOTED-PRINTABLE are folded in a strange way. A "=" is
131                     * appended to the end of a line to signal that the next line is folded.
132                     * Also, each folded line is not prepend with whitespace.
133                     * 
134                     * For example:
135                     * 
136                     * ------------
137                     * BEGIN:VCARD
138                     * NOTE;QUOTED-PRINTABLE: This is an=0D=0A=
139                     * annoyingly formatted=0D=0A=
140                     * note=
141                     * 
142                     * END:VCARD
143                     * ------------
144                     * 
145                     * In the example above, note how there is an empty line directly above
146                     * END. This is still part of the NOTE property value because the 3rd
147                     * line of NOTE ends with a "=".
148                     * 
149                     * This behavior has only been observed in Outlook vCards.
150                     */
151                    //@formatter:on
152    
153                    boolean foldedQuotedPrintableLine = false;
154                    if (foldedQuotedPrintableValueRegex.matcher(wholeLine).matches()) {
155                            foldedQuotedPrintableLine = true;
156    
157                            //chop off the trailing "="
158                            wholeLine = wholeLine.substring(0, wholeLine.length() - 1);
159                    }
160    
161                    lastLineNum = lineCount;
162                    StringBuilder unfoldedLine = new StringBuilder(wholeLine);
163                    while (true) {
164                            String line = foldedQuotedPrintableLine ? super.readLine() : readNonEmptyLine();
165                            if (line == null) {
166                                    //end of stream
167                                    break;
168                            }
169    
170                            if (foldedQuotedPrintableLine) {
171                                    line = StringUtils.ltrim(line);
172    
173                                    boolean endsInEquals = line.endsWith("=");
174                                    if (endsInEquals) {
175                                            //chop off the trailing "="
176                                            line = line.substring(0, line.length() - 1);
177                                    }
178    
179                                    unfoldedLine.append(line);
180    
181                                    if (endsInEquals) {
182                                            //there are more folded lines
183                                            continue;
184                                    } else {
185                                            //end of the folded line
186                                            break;
187                                    }
188                            }
189    
190                            if (line.length() > 0 && Character.isWhitespace(line.charAt(0))) {
191                                    //the line is folded
192    
193                                    int lastWhitespace = 1;
194                                    //Note: Evolution will include real whitespace chars alongside the folding char
195                                    while (lastWhitespace < line.length() && Character.isWhitespace(line.charAt(lastWhitespace))) {
196                                            lastWhitespace++;
197                                    }
198                                    unfoldedLine.append(line.substring(lastWhitespace));
199    
200                                    continue;
201                            }
202    
203                            lastLine = line;
204                            break;
205                    }
206    
207                    return unfoldedLine.toString();
208            }
209    }