001////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code for adherence to a set of rules.
003// Copyright (C) 2001-2017 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.api;
021
022import java.io.BufferedReader;
023import java.io.File;
024import java.io.FileInputStream;
025import java.io.FileNotFoundException;
026import java.io.IOException;
027import java.io.InputStreamReader;
028import java.io.Reader;
029import java.io.StringReader;
030import java.nio.charset.Charset;
031import java.nio.charset.CharsetDecoder;
032import java.nio.charset.CodingErrorAction;
033import java.nio.charset.UnsupportedCharsetException;
034import java.util.AbstractList;
035import java.util.ArrayList;
036import java.util.Arrays;
037import java.util.List;
038import java.util.regex.Matcher;
039import java.util.regex.Pattern;
040
041import com.google.common.io.Closeables;
042
043/**
044 * Represents the text contents of a file of arbitrary plain text type.
045 * <p>
046 * This class will be passed to instances of class FileSetCheck by
047 * Checker. It implements a string list to ensure backwards
048 * compatibility, but can be extended in the future to allow more
049 * flexible, more powerful or more efficient handling of certain
050 * situations.
051 * </p>
052 *
053 * @author Martin von Gagern
054 */
055public final class FileText extends AbstractList<String> {
056
057    /**
058     * The number of characters to read in one go.
059     */
060    private static final int READ_BUFFER_SIZE = 1024;
061
062    /**
063     * Regular expression pattern matching all line terminators.
064     */
065    private static final Pattern LINE_TERMINATOR = Pattern.compile("\\n|\\r\\n?");
066
067    // For now, we always keep both full text and lines array.
068    // In the long run, however, the one passed at initialization might be
069    // enough, while the other could be lazily created when requested.
070    // This would save memory but cost CPU cycles.
071
072    /**
073     * The name of the file.
074     * {@code null} if no file name is available for whatever reason.
075     */
076    private final File file;
077
078    /**
079     * The charset used to read the file.
080     * {@code null} if the file was reconstructed from a list of lines.
081     */
082    private final Charset charset;
083
084    /**
085     * The full text contents of the file.
086     */
087    private final String fullText;
088
089    /**
090     * The lines of the file, without terminators.
091     */
092    private final String[] lines;
093
094    /**
095     * The first position of each line within the full text.
096     */
097    private int[] lineBreaks;
098
099    /**
100     * Creates a new file text representation.
101     *
102     * <p>The file will be read using the specified encoding, replacing
103     * malformed input and unmappable characters with the default
104     * replacement character.
105     *
106     * @param file the name of the file
107     * @param charsetName the encoding to use when reading the file
108     * @throws NullPointerException if the text is null
109     * @throws IOException if the file could not be read
110     */
111    public FileText(File file, String charsetName) throws IOException {
112        this.file = file;
113
114        // We use our own decoder, to be sure we have complete control
115        // about replacements.
116        final CharsetDecoder decoder;
117        try {
118            charset = Charset.forName(charsetName);
119            decoder = charset.newDecoder();
120            decoder.onMalformedInput(CodingErrorAction.REPLACE);
121            decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
122        }
123        catch (final UnsupportedCharsetException ex) {
124            final String message = "Unsupported charset: " + charsetName;
125            throw new IllegalStateException(message, ex);
126        }
127
128        fullText = readFile(file, decoder);
129
130        // Use the BufferedReader to break down the lines as this
131        // is about 30% faster than using the
132        // LINE_TERMINATOR.split(fullText, -1) method
133        final ArrayList<String> textLines = new ArrayList<>();
134        final BufferedReader reader =
135            new BufferedReader(new StringReader(fullText));
136        while (true) {
137            final String line = reader.readLine();
138            if (line == null) {
139                break;
140            }
141            textLines.add(line);
142        }
143        lines = textLines.toArray(new String[textLines.size()]);
144    }
145
146    /**
147     * Copy constructor.
148     * @param fileText to make copy of
149     */
150    public FileText(FileText fileText) {
151        file = fileText.file;
152        charset = fileText.charset;
153        fullText = fileText.fullText;
154        lines = fileText.lines.clone();
155        if (fileText.lineBreaks == null) {
156            lineBreaks = null;
157        }
158        else {
159            lineBreaks = fileText.lineBreaks.clone();
160        }
161    }
162
163    /**
164     * Compatibility constructor.
165     *
166     * <p>This constructor reconstructs the text of the file by joining
167     * lines with linefeed characters. This process does not restore
168     * the original line terminators and should therefore be avoided.
169     *
170     * @param file the name of the file
171     * @param lines the lines of the text, without terminators
172     * @throws NullPointerException if the lines array is null
173     */
174    private FileText(File file, List<String> lines) {
175        final StringBuilder buf = new StringBuilder();
176        for (final String line : lines) {
177            buf.append(line).append('\n');
178        }
179        buf.trimToSize();
180
181        this.file = file;
182        charset = null;
183        fullText = buf.toString();
184        this.lines = lines.toArray(new String[lines.size()]);
185    }
186
187    /**
188     * Reads file using specific decoder and returns all its content as a String.
189     * @param inputFile File to read
190     * @param decoder Charset decoder
191     * @return File's text
192     * @throws IOException Unable to open or read the file
193     */
194    private static String readFile(final File inputFile, final CharsetDecoder decoder)
195            throws IOException {
196        if (!inputFile.exists()) {
197            throw new FileNotFoundException(inputFile.getPath() + " (No such file or directory)");
198        }
199        final StringBuilder buf = new StringBuilder();
200        final FileInputStream stream = new FileInputStream(inputFile);
201        final Reader reader = new InputStreamReader(stream, decoder);
202        try {
203            final char[] chars = new char[READ_BUFFER_SIZE];
204            while (true) {
205                final int len = reader.read(chars);
206                if (len < 0) {
207                    break;
208                }
209                buf.append(chars, 0, len);
210            }
211        }
212        finally {
213            Closeables.closeQuietly(reader);
214        }
215        return buf.toString();
216    }
217
218    /**
219     * Compatibility conversion.
220     *
221     * <p>This method can be used to convert the arguments passed to
222     * {@link FileSetCheck#process(File,List)} to a FileText
223     * object. If the list of lines already is a FileText, it is
224     * returned as is. Otherwise, a new FileText is constructed by
225     * joining the lines using line feed characters.
226     *
227     * @param file the name of the file
228     * @param lines the lines of the text, without terminators
229     * @return an object representing the denoted text file
230     */
231    public static FileText fromLines(File file, List<String> lines) {
232        final FileText fileText;
233        if (lines instanceof FileText) {
234            fileText = (FileText) lines;
235        }
236        else {
237            fileText = new FileText(file, lines);
238        }
239        return fileText;
240    }
241
242    /**
243     * Get the name of the file.
244     * @return an object containing the name of the file
245     */
246    public File getFile() {
247        return file;
248    }
249
250    /**
251     * Get the character set which was used to read the file.
252     * Will be {@code null} for a file reconstructed from its lines.
253     * @return the charset used when the file was read
254     */
255    public Charset getCharset() {
256        return charset;
257    }
258
259    /**
260     * Retrieve the full text of the file.
261     * @return the full text of the file
262     */
263    public CharSequence getFullText() {
264        return fullText;
265    }
266
267    /**
268     * Returns an array of all lines.
269     * {@code text.toLinesArray()} is equivalent to
270     * {@code text.toArray(new String[text.size()])}.
271     * @return an array of all lines of the text
272     */
273    public String[] toLinesArray() {
274        return lines.clone();
275    }
276
277    /**
278     * Find positions of line breaks in the full text.
279     * @return an array giving the first positions of each line.
280     */
281    private int[] findLineBreaks() {
282        if (lineBreaks == null) {
283            final int[] lineBreakPositions = new int[size() + 1];
284            lineBreakPositions[0] = 0;
285            int lineNo = 1;
286            final Matcher matcher = LINE_TERMINATOR.matcher(fullText);
287            while (matcher.find()) {
288                lineBreakPositions[lineNo] = matcher.end();
289                lineNo++;
290            }
291            if (lineNo < lineBreakPositions.length) {
292                lineBreakPositions[lineNo] = fullText.length();
293            }
294            lineBreaks = lineBreakPositions;
295        }
296        return lineBreaks;
297    }
298
299    /**
300     * Determine line and column numbers in full text.
301     * @param pos the character position in the full text
302     * @return the line and column numbers of this character
303     */
304    public LineColumn lineColumn(int pos) {
305        final int[] lineBreakPositions = findLineBreaks();
306        int lineNo = Arrays.binarySearch(lineBreakPositions, pos);
307        if (lineNo < 0) {
308            // we have: lineNo = -(insertion point) - 1
309            // we want: lineNo =  (insertion point) - 1
310            lineNo = -lineNo - 2;
311        }
312        final int startOfLine = lineBreakPositions[lineNo];
313        final int columnNo = pos - startOfLine;
314        // now we have lineNo and columnNo, both starting at zero.
315        return new LineColumn(lineNo + 1, columnNo);
316    }
317
318    /**
319     * Retrieves a line of the text by its number.
320     * The returned line will not contain a trailing terminator.
321     * @param lineNo the number of the line to get, starting at zero
322     * @return the line with the given number
323     */
324    @Override
325    public String get(final int lineNo) {
326        return lines[lineNo];
327    }
328
329    /**
330     * Counts the lines of the text.
331     * @return the number of lines in the text
332     */
333    @Override
334    public int size() {
335        return lines.length;
336    }
337
338}