001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkNotNull; 018 019import com.google.common.annotations.Beta; 020import com.google.common.annotations.GwtIncompatible; 021import com.google.common.base.Ascii; 022import com.google.common.base.Optional; 023import com.google.common.base.Splitter; 024import com.google.common.collect.AbstractIterator; 025import com.google.common.collect.ImmutableList; 026import com.google.common.collect.Lists; 027import com.google.common.collect.Streams; 028import com.google.errorprone.annotations.CanIgnoreReturnValue; 029import com.google.errorprone.annotations.MustBeClosed; 030import java.io.BufferedReader; 031import java.io.IOException; 032import java.io.InputStream; 033import java.io.Reader; 034import java.io.StringReader; 035import java.io.UncheckedIOException; 036import java.io.Writer; 037import java.nio.charset.Charset; 038import java.util.Iterator; 039import java.util.List; 040import java.util.function.Consumer; 041import java.util.stream.Stream; 042import org.checkerframework.checker.nullness.qual.Nullable; 043 044/** 045 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a {@code 046 * CharSource} is not an open, stateful stream of characters that can be read and closed. Instead, 047 * it is an immutable <i>supplier</i> of {@code Reader} instances. 048 * 049 * <p>{@code CharSource} provides two kinds of methods: 050 * 051 * <ul> 052 * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent 053 * instance each time they are called. The caller is responsible for ensuring that the 054 * returned reader is closed. 055 * <li><b>Convenience methods:</b> These are implementations of common operations that are 056 * typically implemented by opening a reader using one of the methods in the first category, 057 * doing something and finally closing the reader that was opened. 058 * </ul> 059 * 060 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source 061 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, {@code 062 * \r} or {@code \r\n}, do not include the line separator in each line and do not consider there to 063 * be an empty line at the end if the contents are terminated with a line separator. 064 * 065 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character 066 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}. 067 * 068 * @since 14.0 069 * @author Colin Decker 070 */ 071@GwtIncompatible 072public abstract class CharSource implements InputSupplier<Reader> { 073 074 /** Constructor for use by subclasses. */ 075 protected CharSource() {} 076 077 /** 078 * Returns a {@link ByteSource} view of this char source that encodes chars read from this source 079 * as bytes using the given {@link Charset}. 080 * 081 * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset, 082 * the default implementation of this method will ensure that the original {@code CharSource} is 083 * returned, rather than round-trip encoding. Subclasses that override this method should behave 084 * the same way. 085 * 086 * @since 20.0 087 */ 088 @Beta 089 public ByteSource asByteSource(Charset charset) { 090 return new AsByteSource(charset); 091 } 092 093 /** 094 * Opens a new {@link Reader} for reading from this source. This method returns a new, independent 095 * reader each time it is called. 096 * 097 * <p>The caller is responsible for ensuring that the returned reader is closed. 098 * 099 * @throws IOException if an I/O error occurs while opening the reader 100 */ 101 public abstract Reader openStream() throws IOException; 102 103 /** 104 * This method is a temporary method provided for easing migration from suppliers to sources and 105 * sinks. 106 * 107 * @since 15.0 108 * @deprecated This method is only provided for temporary compatibility with the 109 * {@link InputSupplier} interface and should not be called directly. Use {@link #openStream} 110 * instead. This method is scheduled for removal in Guava 18.0. 111 */ 112 @Override 113 @Deprecated 114 public final Reader getInput() throws IOException { 115 return openStream(); 116 } 117 118 /** 119 * Opens a new {@link BufferedReader} for reading from this source. This method returns a new, 120 * independent reader each time it is called. 121 * 122 * <p>The caller is responsible for ensuring that the returned reader is closed. 123 * 124 * @throws IOException if an I/O error occurs while of opening the reader 125 */ 126 public BufferedReader openBufferedStream() throws IOException { 127 Reader reader = openStream(); 128 return (reader instanceof BufferedReader) 129 ? (BufferedReader) reader 130 : new BufferedReader(reader); 131 } 132 133 /** 134 * Opens a new {@link Stream} for reading text one line at a time from this source. This method 135 * returns a new, independent stream each time it is called. 136 * 137 * <p>The returned stream is lazy and only reads from the source in the terminal operation. If an 138 * I/O error occurs while the stream is reading from the source or when the stream is closed, an 139 * {@link UncheckedIOException} is thrown. 140 * 141 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 142 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 143 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 144 * it does. 145 * 146 * <p>The caller is responsible for ensuring that the returned stream is closed. For example: 147 * 148 * <pre>{@code 149 * try (Stream<String> lines = source.lines()) { 150 * lines.map(...) 151 * .filter(...) 152 * .forEach(...); 153 * } 154 * }</pre> 155 * 156 * @throws IOException if an I/O error occurs while opening the stream 157 * @since 22.0 158 */ 159 @Beta 160 @MustBeClosed 161 public Stream<String> lines() throws IOException { 162 BufferedReader reader = openBufferedStream(); 163 return reader 164 .lines() 165 .onClose( 166 () -> { 167 try { 168 reader.close(); 169 } catch (IOException e) { 170 throw new UncheckedIOException(e); 171 } 172 }); 173 } 174 175 /** 176 * Returns the size of this source in chars, if the size can be easily determined without actually 177 * opening the data stream. 178 * 179 * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a {@code 180 * CharSequence}, may return a non-absent value. Note that in such cases, it is <i>possible</i> 181 * that this method will return a different number of chars than would be returned by reading all 182 * of the chars. 183 * 184 * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may 185 * return a different number of chars if the contents are changed. 186 * 187 * @since 19.0 188 */ 189 @Beta 190 public Optional<Long> lengthIfKnown() { 191 return Optional.absent(); 192 } 193 194 /** 195 * Returns the length of this source in chars, even if doing so requires opening and traversing an 196 * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}. 197 * 198 * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If 199 * absent, it will fall back to a heavyweight operation that will open a stream, {@link 200 * Reader#skip(long) skip} to the end of the stream, and return the total number of chars that 201 * were skipped. 202 * 203 * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient 204 * implementation, it is <i>possible</i> that this method will return a different number of chars 205 * than would be returned by reading all of the chars. 206 * 207 * <p>In either case, for mutable sources such as files, a subsequent read may return a different 208 * number of chars if the contents are changed. 209 * 210 * @throws IOException if an I/O error occurs while reading the length of this source 211 * @since 19.0 212 */ 213 @Beta 214 public long length() throws IOException { 215 Optional<Long> lengthIfKnown = lengthIfKnown(); 216 if (lengthIfKnown.isPresent()) { 217 return lengthIfKnown.get(); 218 } 219 220 Closer closer = Closer.create(); 221 try { 222 Reader reader = closer.register(openStream()); 223 return countBySkipping(reader); 224 } catch (Throwable e) { 225 throw closer.rethrow(e); 226 } finally { 227 closer.close(); 228 } 229 } 230 231 private long countBySkipping(Reader reader) throws IOException { 232 long count = 0; 233 long read; 234 while ((read = reader.skip(Long.MAX_VALUE)) != 0) { 235 count += read; 236 } 237 return count; 238 } 239 240 /** 241 * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}). 242 * Does not close {@code appendable} if it is {@code Closeable}. 243 * 244 * @return the number of characters copied 245 * @throws IOException if an I/O error occurs while reading from this source or writing to {@code 246 * appendable} 247 */ 248 @CanIgnoreReturnValue 249 public long copyTo(Appendable appendable) throws IOException { 250 checkNotNull(appendable); 251 252 Closer closer = Closer.create(); 253 try { 254 Reader reader = closer.register(openStream()); 255 return CharStreams.copy(reader, appendable); 256 } catch (Throwable e) { 257 throw closer.rethrow(e); 258 } finally { 259 closer.close(); 260 } 261 } 262 263 /** 264 * Copies the contents of this source to the given sink. 265 * 266 * @return the number of characters copied 267 * @throws IOException if an I/O error occurs while reading from this source or writing to {@code 268 * sink} 269 */ 270 @CanIgnoreReturnValue 271 public long copyTo(CharSink sink) throws IOException { 272 checkNotNull(sink); 273 274 Closer closer = Closer.create(); 275 try { 276 Reader reader = closer.register(openStream()); 277 Writer writer = closer.register(sink.openStream()); 278 return CharStreams.copy(reader, writer); 279 } catch (Throwable e) { 280 throw closer.rethrow(e); 281 } finally { 282 closer.close(); 283 } 284 } 285 286 /** 287 * Reads the contents of this source as a string. 288 * 289 * @throws IOException if an I/O error occurs while reading from this source 290 */ 291 public String read() throws IOException { 292 Closer closer = Closer.create(); 293 try { 294 Reader reader = closer.register(openStream()); 295 return CharStreams.toString(reader); 296 } catch (Throwable e) { 297 throw closer.rethrow(e); 298 } finally { 299 closer.close(); 300 } 301 } 302 303 /** 304 * Reads the first line of this source as a string. Returns {@code null} if this source is empty. 305 * 306 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 307 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 308 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 309 * it does. 310 * 311 * @throws IOException if an I/O error occurs while reading from this source 312 */ 313 public @Nullable String readFirstLine() throws IOException { 314 Closer closer = Closer.create(); 315 try { 316 BufferedReader reader = closer.register(openBufferedStream()); 317 return reader.readLine(); 318 } catch (Throwable e) { 319 throw closer.rethrow(e); 320 } finally { 321 closer.close(); 322 } 323 } 324 325 /** 326 * Reads all the lines of this source as a list of strings. The returned list will be empty if 327 * this source is empty. 328 * 329 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 330 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 331 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 332 * it does. 333 * 334 * @throws IOException if an I/O error occurs while reading from this source 335 */ 336 public ImmutableList<String> readLines() throws IOException { 337 Closer closer = Closer.create(); 338 try { 339 BufferedReader reader = closer.register(openBufferedStream()); 340 List<String> result = Lists.newArrayList(); 341 String line; 342 while ((line = reader.readLine()) != null) { 343 result.add(line); 344 } 345 return ImmutableList.copyOf(result); 346 } catch (Throwable e) { 347 throw closer.rethrow(e); 348 } finally { 349 closer.close(); 350 } 351 } 352 353 /** 354 * Reads lines of text from this source, processing each line as it is read using the given {@link 355 * LineProcessor processor}. Stops when all lines have been processed or the processor returns 356 * {@code false} and returns the result produced by the processor. 357 * 358 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 359 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 360 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 361 * it does. 362 * 363 * @throws IOException if an I/O error occurs while reading from this source or if {@code 364 * processor} throws an {@code IOException} 365 * @since 16.0 366 */ 367 @Beta 368 @CanIgnoreReturnValue // some processors won't return a useful result 369 public <T> T readLines(LineProcessor<T> processor) throws IOException { 370 checkNotNull(processor); 371 372 Closer closer = Closer.create(); 373 try { 374 Reader reader = closer.register(openStream()); 375 return CharStreams.readLines(reader, processor); 376 } catch (Throwable e) { 377 throw closer.rethrow(e); 378 } finally { 379 closer.close(); 380 } 381 } 382 383 /** 384 * Reads all lines of text from this source, running the given {@code action} for each line as it 385 * is read. 386 * 387 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 388 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 389 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 390 * it does. 391 * 392 * @throws IOException if an I/O error occurs while reading from this source or if {@code action} 393 * throws an {@code UncheckedIOException} 394 * @since 22.0 395 */ 396 @Beta 397 public void forEachLine(Consumer<? super String> action) throws IOException { 398 try (Stream<String> lines = lines()) { 399 // The lines should be ordered regardless in most cases, but use forEachOrdered to be sure 400 lines.forEachOrdered(action); 401 } catch (UncheckedIOException e) { 402 throw e.getCause(); 403 } 404 } 405 406 /** 407 * Returns whether the source has zero chars. The default implementation first checks {@link 408 * #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be 409 * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF. 410 * 411 * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that 412 * chars are actually available for reading. This means that a source may return {@code true} from 413 * {@code isEmpty()} despite having readable content. 414 * 415 * @throws IOException if an I/O error occurs 416 * @since 15.0 417 */ 418 public boolean isEmpty() throws IOException { 419 Optional<Long> lengthIfKnown = lengthIfKnown(); 420 if (lengthIfKnown.isPresent()) { 421 return lengthIfKnown.get() == 0L; 422 } 423 Closer closer = Closer.create(); 424 try { 425 Reader reader = closer.register(openStream()); 426 return reader.read() == -1; 427 } catch (Throwable e) { 428 throw closer.rethrow(e); 429 } finally { 430 closer.close(); 431 } 432 } 433 434 /** 435 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 436 * the source will contain the concatenated data from the streams of the underlying sources. 437 * 438 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 439 * close the open underlying stream. 440 * 441 * @param sources the sources to concatenate 442 * @return a {@code CharSource} containing the concatenated data 443 * @since 15.0 444 */ 445 public static CharSource concat(Iterable<? extends CharSource> sources) { 446 return new ConcatenatedCharSource(sources); 447 } 448 449 /** 450 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 451 * the source will contain the concatenated data from the streams of the underlying sources. 452 * 453 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 454 * close the open underlying stream. 455 * 456 * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method 457 * is called. This will fail if the iterator is infinite and may cause problems if the iterator 458 * eagerly fetches data for each source when iterated (rather than producing sources that only 459 * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if 460 * possible. 461 * 462 * @param sources the sources to concatenate 463 * @return a {@code CharSource} containing the concatenated data 464 * @throws NullPointerException if any of {@code sources} is {@code null} 465 * @since 15.0 466 */ 467 public static CharSource concat(Iterator<? extends CharSource> sources) { 468 return concat(ImmutableList.copyOf(sources)); 469 } 470 471 /** 472 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 473 * the source will contain the concatenated data from the streams of the underlying sources. 474 * 475 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 476 * close the open underlying stream. 477 * 478 * @param sources the sources to concatenate 479 * @return a {@code CharSource} containing the concatenated data 480 * @throws NullPointerException if any of {@code sources} is {@code null} 481 * @since 15.0 482 */ 483 public static CharSource concat(CharSource... sources) { 484 return concat(ImmutableList.copyOf(sources)); 485 } 486 487 /** 488 * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the 489 * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if 490 * the {@code charSequence} is mutated while it is being read, so don't do that. 491 * 492 * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)}) 493 */ 494 public static CharSource wrap(CharSequence charSequence) { 495 return charSequence instanceof String 496 ? new StringCharSource((String) charSequence) 497 : new CharSequenceCharSource(charSequence); 498 } 499 500 /** 501 * Returns an immutable {@link CharSource} that contains no characters. 502 * 503 * @since 15.0 504 */ 505 public static CharSource empty() { 506 return EmptyCharSource.INSTANCE; 507 } 508 509 /** A byte source that reads chars from this source and encodes them as bytes using a charset. */ 510 private final class AsByteSource extends ByteSource { 511 512 final Charset charset; 513 514 AsByteSource(Charset charset) { 515 this.charset = checkNotNull(charset); 516 } 517 518 @Override 519 public CharSource asCharSource(Charset charset) { 520 if (charset.equals(this.charset)) { 521 return CharSource.this; 522 } 523 return super.asCharSource(charset); 524 } 525 526 @Override 527 public InputStream openStream() throws IOException { 528 return new ReaderInputStream(CharSource.this.openStream(), charset, 8192); 529 } 530 531 @Override 532 public String toString() { 533 return CharSource.this.toString() + ".asByteSource(" + charset + ")"; 534 } 535 } 536 537 private static class CharSequenceCharSource extends CharSource { 538 539 private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r"); 540 541 protected final CharSequence seq; 542 543 protected CharSequenceCharSource(CharSequence seq) { 544 this.seq = checkNotNull(seq); 545 } 546 547 @Override 548 public Reader openStream() { 549 return new CharSequenceReader(seq); 550 } 551 552 @Override 553 public String read() { 554 return seq.toString(); 555 } 556 557 @Override 558 public boolean isEmpty() { 559 return seq.length() == 0; 560 } 561 562 @Override 563 public long length() { 564 return seq.length(); 565 } 566 567 @Override 568 public Optional<Long> lengthIfKnown() { 569 return Optional.of((long) seq.length()); 570 } 571 572 /** 573 * Returns an iterator over the lines in the string. If the string ends in a newline, a final 574 * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine(). 575 */ 576 private Iterator<String> linesIterator() { 577 return new AbstractIterator<String>() { 578 Iterator<String> lines = LINE_SPLITTER.split(seq).iterator(); 579 580 @Override 581 protected String computeNext() { 582 if (lines.hasNext()) { 583 String next = lines.next(); 584 // skip last line if it's empty 585 if (lines.hasNext() || !next.isEmpty()) { 586 return next; 587 } 588 } 589 return endOfData(); 590 } 591 }; 592 } 593 594 @Override 595 public Stream<String> lines() { 596 return Streams.stream(linesIterator()); 597 } 598 599 @Override 600 public String readFirstLine() { 601 Iterator<String> lines = linesIterator(); 602 return lines.hasNext() ? lines.next() : null; 603 } 604 605 @Override 606 public ImmutableList<String> readLines() { 607 return ImmutableList.copyOf(linesIterator()); 608 } 609 610 @Override 611 public <T> T readLines(LineProcessor<T> processor) throws IOException { 612 Iterator<String> lines = linesIterator(); 613 while (lines.hasNext()) { 614 if (!processor.processLine(lines.next())) { 615 break; 616 } 617 } 618 return processor.getResult(); 619 } 620 621 @Override 622 public String toString() { 623 return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")"; 624 } 625 } 626 627 /** 628 * Subclass specialized for string instances. 629 * 630 * <p>Since Strings are immutable and built into the jdk we can optimize some operations 631 * 632 * <ul> 633 * <li>use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can 634 * use {@link String#getChars(int, int, char[], int)} instead of copying characters one by 635 * one with {@link CharSequence#charAt(int)}. 636 * <li>use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link 637 * #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length 638 * can't change, and it is faster because many writers and appendables are optimized for 639 * appending string instances. 640 * </ul> 641 */ 642 private static class StringCharSource extends CharSequenceCharSource { 643 protected StringCharSource(String seq) { 644 super(seq); 645 } 646 647 @Override 648 public Reader openStream() { 649 return new StringReader((String) seq); 650 } 651 652 @Override 653 public long copyTo(Appendable appendable) throws IOException { 654 appendable.append(seq); 655 return seq.length(); 656 } 657 658 @Override 659 public long copyTo(CharSink sink) throws IOException { 660 checkNotNull(sink); 661 Closer closer = Closer.create(); 662 try { 663 Writer writer = closer.register(sink.openStream()); 664 writer.write((String) seq); 665 return seq.length(); 666 } catch (Throwable e) { 667 throw closer.rethrow(e); 668 } finally { 669 closer.close(); 670 } 671 } 672 } 673 674 private static final class EmptyCharSource extends StringCharSource { 675 676 private static final EmptyCharSource INSTANCE = new EmptyCharSource(); 677 678 private EmptyCharSource() { 679 super(""); 680 } 681 682 @Override 683 public String toString() { 684 return "CharSource.empty()"; 685 } 686 } 687 688 private static final class ConcatenatedCharSource extends CharSource { 689 690 private final Iterable<? extends CharSource> sources; 691 692 ConcatenatedCharSource(Iterable<? extends CharSource> sources) { 693 this.sources = checkNotNull(sources); 694 } 695 696 @Override 697 public Reader openStream() throws IOException { 698 return new MultiReader(sources.iterator()); 699 } 700 701 @Override 702 public boolean isEmpty() throws IOException { 703 for (CharSource source : sources) { 704 if (!source.isEmpty()) { 705 return false; 706 } 707 } 708 return true; 709 } 710 711 @Override 712 public Optional<Long> lengthIfKnown() { 713 long result = 0L; 714 for (CharSource source : sources) { 715 Optional<Long> lengthIfKnown = source.lengthIfKnown(); 716 if (!lengthIfKnown.isPresent()) { 717 return Optional.absent(); 718 } 719 result += lengthIfKnown.get(); 720 } 721 return Optional.of(result); 722 } 723 724 @Override 725 public long length() throws IOException { 726 long result = 0L; 727 for (CharSource source : sources) { 728 result += source.length(); 729 } 730 return result; 731 } 732 733 @Override 734 public String toString() { 735 return "CharSource.concat(" + sources + ")"; 736 } 737 } 738}