java source code of DfaRun

/*+*********************************************************************
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation
Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
************************************************************************/

package monq.jfa;

import java.io.Serializable;
import java.io.IOException;
import java.io.PrintStream;

/**
 * <p>A <code>DfaRun</code> is used to apply a {@link Dfa} to a
 * stream of characters. After creation of a <code>DfaRun</code>
 * object, invoke one of its {@link #read()} or {@link #filter()}
 * methods to filter the input data according to the patterns encoded
 * in the <code>Dfa</code> and the {@link FaAction} callback objects
 * attached to them.</p>
 *
 * <p>The default behaviour of the machine on non-matching input is
 * initialized from whatever was specified when the <code>Dfa</code>
 * {@link Nfa#compile was compiled}. Initialization happens in the
 * constructor as well as every time one of the {@link #setIn setIn()}
 * methods is called. The method {@link #setOnFailedMatch
 * setOnFailedMatch()} should normally only be used in {@link
 * FaAction} callbacks.</p>
 *
 * <p>Use field {@link #clientData} to store data to communicate
 * between different action callbacks. Don't let your action callbacks
 * communicate via a common object allocated alongside the
 * <code>Dfa</code>, because this does not allow to share
 * <code>Dfa</code>s between threads.</p>
 *
 * <p>Set field {@link #collect} to <code>true</code> in an action
 * callback to prevent the <code>read()</code> methods from
 * returning. Thereby data already filtered is kept from shipping and
 * can be changed by further action callbacks. Eventually, however, an
 * action callback should set <code>collect</code> to
 * <code>false</code> again to allow the <code>read()</code> method to
 * finally ship the filtered data.</p>
 *
 * <p>A <code>Dfa</code> that matches the empty string should not be
 * used in a <code>DfaRun</code>, because this is usually a bug in the
 * regular expressions used. As soon as only the empty string
 * matches, methods like {@link #filter filter()} enter an infinitie
 * loop because they keep matching without reading input. Use {@link
 * Dfa#matchesEmpty} if unsure whether your <code>Dfa</code> is safe
 * to use.
 *
 * <p>It is safe to change the <code>Dfa</code> with {@link #setDfa
 * setDfa()} at any time within an action callback. This is
 * particularly useful to parse different parts of input with
 * different automata.</p>
 *
 * <p><b>Note:</b> This class is not synchronized. Objects of this
 * class should only be used within one thread at a time. However, the
 * {@link Dfa} operated may be shared between threads, given that the
 * {@link FaAction} callbacks in the <code>Dfa</code> contain no
 * internal state. For the callbacks to communicate, use {@link
 * #clientData}.</p>
 *
 * <p><b>Hint</b> For maximum speed try to complete your set of
 * regular expressions such that every piece of input is
 * matched. Don't rely on <code>DfaRun</code>'s feature to handle
 * unmatched input. Handling unmatched input is less efficient than
 * handling matches.</p>
 *
 * @author (C) 2003 Harald Kirsch
 * @version $Revision: 1.48 $, $Date: 2006-09-03 17:28:50 $
 */
public class DfaRun implements CharSource, Serializable {

  /**
   * defines typed enumerated values which describe
   * what a <code>DfaRun</code> shall do in its read() and filter()
   * functions, if
   * no match can be found.
   */
  public static final class FailedMatchBehaviour implements Serializable {
    static FailedMatchBehaviour[] all = new FailedMatchBehaviour[3];
    static {
      all[0] = new FailedMatchBehaviour(0);
      all[1] = new FailedMatchBehaviour(1);
      all[2] = new FailedMatchBehaviour(2);
    }
    int i;
    FailedMatchBehaviour(int i) {this.i = i;}
    private Object readResolve() {
      return all[i];
    }
  }

  /**
   * <p>requests the <code>DfaRun</code> object to copy input not
   * matched by the DFA to the output.</p>
   *
   * @see #setOnFailedMatch
   */
  public static final FailedMatchBehaviour UNMATCHED_COPY
    = new FailedMatchBehaviour(0);
  /**
   * requests the <code>DfaRun</code> object to drop (delete) input
   * not matched by the DFA.
   *
   * @see #setOnFailedMatch
   */
  public static final FailedMatchBehaviour UNMATCHED_DROP
    = new FailedMatchBehaviour(1);

  /**
   * requests the <code>DfaRun</code> to throw an exception if it
   * encounters input not matched by the DFA.
   *
   * @see #setOnFailedMatch
   */
  public static final FailedMatchBehaviour UNMATCHED_THROW =
    new FailedMatchBehaviour(1);

  /**
   * returned by {@link #next next()} on EOF.
   */
  public static final FaAction EOF = new AbstractFaAction() {
      @Override
      public void invoke(StringBuilder sb, int start, DfaRun r) {}
      @Override
      public String toString() { return "DfaRun.EOF"; }
    };

  /**
   * is the error text used in a <code>IllegalArgumentException</code>
   * if a <code>DfaRun</code> shall be created with a
   * <code>Dfa</code> that matches the empty string.
   */
  public static final String EEPSMATCHER = "dfa matches the empty string";

  /**
   * <p>set this field to <code>true</code> from an {@link FaAction}
   * callback to prevent the machinery to ship the filtered data. It
   * allows action callbacks {@link FaAction#invoke invoked} later to
   * be sure that their first argument still contains previously
   * filtered data. Make sure this field is set to <code>false</code>
   * by some other action callback as soon as possible, because
   * otherwise filtered data will pile up unneccessarily in memory.</p>
   */
  public boolean collect = false;

  /**
   * <p>defines the maximum number of unmatched characters handled in
   * one chunk when the machinery is operating in {@link
   * #UNMATCHED_COPY} mode. When operating on a stretch of text that
   * contains no match at all, the machine runs in a tight inner loop
   * to find the next match as fast as possible. While doing so, no
   * output is delivered by the <code>filter()</code> and
   * <code>read()</code> methods because they call {@link #next
   * next()}, the method that runs the tight inner loop.</p>
   *
   * <p>To prevent against memory overflow for really long stretches
   * of non-matching text, <code>maxCopy</code> puts an upper
   * limit on the characters collected before <code>next()</code>
   * forcibly returns, even if no match is yet found. Except in very
   * special cases there should be no need to ever change this value
   * from its default of 8192. Any value&nbsp;&le;&nbsp;1 will result
   * in single character delivery by <code>next()</code>. For the
   * <code>filter()</code> methods this seems to
   * have a performance impact compared to large enough values of
   * 30%.</p>
   */
  public int maxCopy = 8192;

  // set true whenever the input is set. Flipped to false in crunch()
  // as soon as the eofAction is performed.
  private boolean eofArmed = false;

  /**
   * is the error text used in a <code>java.io.IOException</code> if
   * EOF is hit while {@link #collect} is <code>true</code>.
   */
  private static final String ECOLLECT = "EOF hit in collect mode";

  /**
   * <p>Room for an arbitrary piece of data. If the callbacks of the
   * <code>Dfa</code> want to communicate with each other &mdash; even
   * if only to count instances in the input stream &mdash; this field
   * should be used to store the data so that the <code>Dfa</code>
   * itself is kept thread safe. Storing e.g. counts in the callback
   * object itself would make the <code>Dfa</code> no longer thread
   * safe.</p>
   */
  public Object clientData = null;

  private Dfa dfa;
  private CharSource in;
  private FailedMatchBehaviour onFailedMatch;
  private int matchStart;

  private final StringBuilder readBuf = new StringBuilder(1024);
  private int readPos; // current index into readBuf
  private final TextStore readTs = new TextStore();

  // reusable field for calling Dfa.match() and the action returned by
  // Dfa.match(). Both are needed to assemble submatch information
  // should a callback call submatches().
  private final SubmatchData smd = new SubmatchData();
  private FaAction action;
  /**********************************************************************/
  /**
   * <p>creates a <code>DfaRun</code> object to operate the given {@link
   * Dfa}. The behaviour on unmatched input and on EOF is initialized
   * from the <code>Dfa</code>.</p>
   *
   * <p>Because in nearly all cases it is a mistake to run a {@link
   * Dfa} that matches the empty string, such a <code>Dfa</code> is
   * not allowed and throws an
   * <code>IllegalArgumentException</code>. In the rare case that
   * a <code>Dfa</code> matching the empty string must be run, you
   * have to first create a <code>DfaRun</code> with a proper
   * <code>Dfa</code> and then replace it with {@link #setDfa}. It is
   * a hassle, but this is intended.</p>
   *
   * @see #setOnFailedMatch
   * @param dfa is the automaton to operate initially. Callbacks may
   * change it.
   * @param in is the initial input source.
   *
   * @throws IllegalArgumentException if the given <code>dfa</code>
   * matches the empty string, i.e. if {@link Dfa#matchesEmpty} returns
   * <code>true</code>.
   */
  public DfaRun(Dfa dfa, CharSource in) {
    if( dfa.matchesEmpty() ) {
      throw new java.lang.IllegalArgumentException(EEPSMATCHER);
    }
    setDfa(dfa);
    setIn(in);
  }

  /**
   * <p>creates a <code>DfaRun</code> with empty initial input. This
   * method calls the 2 parameter constructur with an empty
   * <code>CharSource</code>.</p>
   *
   * @see #DfaRun(Dfa,CharSource)
   */
  public DfaRun(Dfa dfa) {
    this(dfa, new CharSequenceCharSource(""));
  }

  /**********************************************************************/
  /**
   * <p>changes the input source. Within a thread, this is permissable at
   * all times because a <code>DfaRun</code> object does not buffer
   * input data between calls to any of its methods.</p>
   *
   * <p>Apart from (re)initializing the input source, this method
   * initializes two other parameters:</p>
   * <ol>
   *
   * <li>It resets the way to handle non-matching input according to
   * the <code>Dfa</code> operated (see {@link #setOnFailedMatch
   * setOnFailedMatch()}).</li>
   *
   * <li>The action to take when encountering EOF is armed again so
   * that it is run exactly once when EOF is encountered on the newly
   * set input source.</li>
   * </ol>
   */
  public void setIn(CharSource in) {
    this.in = in;
    eofArmed = true;
    this.onFailedMatch = dfa.fmb;
  }

  /**
   * <p>returns the currently active input source.</p>.
   */
  public CharSource getIn() { return in; }

  /**
   * <p>changes the {@link Dfa} to run. In addition the way to handle
   * unmatched input is (re)initialized from the given {@link
   * Dfa}.</p>
   *
   * <p>If the given <code>Dfa</code> matches the empty string,
   * reading and filtering methods may enter an infinite loop. Either
   * check with {@link Dfa#matchesEmpty()} or know what you are
   * doing.</p>
   *
   * @see #setOnFailedMatch
   */
  public void setDfa(Dfa dfa) {
    this.dfa = dfa;
    this.onFailedMatch = dfa.fmb;
  }

  /**
   * returns the {@link Dfa} operated by <code>this</code>.
   */
  public Dfa getDfa() { return dfa; }

  /**
   * <p>changes the way how unmatched input is handled. Any of the
   * values {@link #UNMATCHED_COPY}, {@link #UNMATCHED_DROP} or {@link
   * #UNMATCHED_THROW} may be used. The behaviour is automatically
   * (re)set by {@link #setIn setIn()} and by {@link #setDfa setDfa()}
   * to the value found in the {@link Dfa} operated.</p>
   *
   * <p>This purpose of this method is rather to allow callbacks of
   * the <code>Dfa</code> to change the handling of unmatched input
   * temporarily.</p>
   */
  public void setOnFailedMatch(FailedMatchBehaviour b) {
    onFailedMatch = b;
  }

  /**
   * <p>returns the currently active behaviour for unmatched
   * input.</p>
   */
  public FailedMatchBehaviour getFailedMatchBehaviour() {
    return onFailedMatch;
  }

  /**
   * <p>is a helper function which should only be called immediately after
   * calling {@link #next next()} or {@link #read(StringBuilder)} to get
   * the position where the match starts. This is only needed when the
   * machine is in {@link #UNMATCHED_COPY} mode, because otherwise the
   * match will be the first thing appended to the
   * <code>StringBuilder</code> given to <code>next()</code> or
   * <code>read()</code>.</p>
   *
   * <p><b>Hint:</b> When using this method together with
   * <code>read(StringBuilder)</code>, be aware that the callback
   * handling the match is in principle allowed to delete characters
   * even before the value returned here, rendering the returned value
   * completely useless. &mdash; Know your callbacks!</p>
   */
  public int matchStart() { return matchStart; }

  /**********************************************************************/
  /**
   * reads one character immediately from the input source and returns
   * it without filtering. If filtered characters are already
   * available because of a previous {@link #read()}, these are not
   * touched and will be used in the next call to one of the
   * <code>read()</code> functions.
   */
  public int skip() throws java.io.IOException {
    return in.read();
  }

  /**
   * <p>shoves back characters into the input of the
   * <code>DfaRun</code> while deleting them from the given
   * <code>StringBuilder</code>. The characters will be the first to be
   * read when the machine performs the next match, e.g. when {@link
   * #read} is called.</p>
   */
  public void unskip(StringBuilder s, int startAt) {
    in.pushBack(s, startAt);
  }
  /**
   * <p>shoves back characters into the input of the
   * <code>DfaRun</code>.</p>
   * <p><b>Warning:</b> Do not use this method in time critical
   * applications. It calls the other unskip method with a freshly
   * created <code>StringBuilder</code>.</p>
   * @see #unskip(StringBuilder, int)
   */
  public void unskip(String s) {
    unskip(new StringBuilder(s), 0);
  }

  /**
   * <p>shoves back characters into the input of the
   * <code>DfaRun</code>. This method simply applies {@link
   * TextStore#drain TextStore.drain()} to the input of
   * <code>this</code>. Consequently, <code>start</code> may be
   * negative to indicate a suffix of <code>ts</code> to be pushed
   * back.</p>
   */
  public void unskip(TextStore ts, int start) {
    ts.drain(in, start);
  }
  /**********************************************************************/
  /**
   * <p>may be called by a callback to
   * retrieve see <a
   * href="doc-files/resyntax.html#rse">submatches</a>. Retrieving
   * submatches must be
   * done before the match is changed in any way. A typical call
   * within an {@link FaAction} looks like</p> <pre>
   *   public void invoke(StringBuilder out, int start, DfaRun r)
   *     throws CallbackException {
   *   {
   *     TextStore ts = r.submatches(out, start);
   *     ...
   *   }</pre>
   * <p>Parameter <code>txt</code> is not changed in any way.</p>
   *
   * @param txt must contain the full match starting at position
   * <code>start</code>. It may contain more characters.
   *
   * @param start is the position where the full match starts within
   * <code>txt</code>
   *
   * @return a <code>TextStore</code> that contains the whole match as
   * part 0 and submatches as subsequent parts. The return value is
   * private to <code>this</code> and its contents may only be used
   * locally in a callback. After returning from the callback, the
   * contents of the result may soon change.
   */
  public TextStore submatches(StringBuilder txt, int start) {
    readTs.clear();
    //System.out.println("-->"+txt+"<--, `"+txt.substring(start)
    //+"'  "+smd.size);
    readTs.appendPart(txt, start, txt.length());
    smd.analyze(readTs, action);
    return readTs;
  }
  /**********************************************************************/
  /**
   * <p>finds the next match in the current input, appends it to
   * <code>out</code> and returns the {@link FaAction} associated with
   * the match. Input is read until a match is found, {@link #maxCopy}
   * is reached or EOF is hit. Non-matching input is handled
   * according to {@link #setOnFailedMatch setOnFailedMatch()}. In
   * particular:</p>
   *
   * <dl>
   * <dt>{@link #UNMATCHED_COPY}</dt><dd> will append up to {@link
   * #maxCopy} non-matching characters in front of the match. If
   * <code>maxCopy</code> is reached before the match, <b>no matching
   * text is returned</b>, only the non-matching characters. In this
   * case the return value is <code>null</code>, and should
   * <code>maxCopy</code> be &le;&nbsp;1, then 1 character is always
   * delivered. If a match is found before <code>maxCopy</code> is
   * reached, the match is appended to <code>out</code>. To find
   * out where the match actually starts, call {@link #matchStart()}.</dd>
   *
   * <dt>{@link #UNMATCHED_DROP}</dt><dd>will drop (delete)
   * unmatched text. In this case the matching text is the only text
   * appended to <code>out</code>.</dd>
   *
   * <dt>{@link #UNMATCHED_THROW}</dt><dd>causes a
   * {@link monq.jfa.NomatchException} to be thrown. No text will
   * be appended to <code>out</code> and the offenting text will still
   * be available in the {@link CharSource} serving as input to
   * <code>this</code>.</dd>
   * </dl>
   *
   * <p><b>Hint:</b> Use this method if you are interested only in a
   * simple tokenization of the input. The actions returned may serve
   * as the token type. If you however want to apply the actions
   * returned immediately to the match, then rather use one of the
   * <code>read</code> or <code>filter</code> methods. If you find
   * yourself using <code>if</code> statements on the
   * <code>FaAction</code> returned, you are definitively doing
   * something wrong.</p>
   *
   * @return <dl>
   * <dt>eofAction</dt><dd> When EOF is hit the first time and the
   * <code>Dfa</code> operated has a action set for EOF
   * which is not <code>null</code> this is returned (see {@link
   * Nfa#compile Nfa.compile()}).</dd>
   *
   * <dt>{@link #EOF}</dt><dd>if EOF is hit and
   * <code>eofAction</code> was already delivered or is
   * <code>null</code>. The output may have non-matching input that
   * was found just before EOF.</dd>
   *
   * <dt><code>null</code></dt><dd>if <code>UNMATCHED_COPY</code> is
   * active and <code>maxCopy</code> non-matching characters where
   * found before a match was encountered.</dd>
   *
   * <dt>an action</dt><dd> found for a match.</dd>
   * </dl>
   *
   */
  public FaAction next(StringBuilder out)
    throws java.io.IOException
  {
    matchStart = out.length();
    FaAction a = dfa.match(in, out, smd);

    if( a==null ) {
      // There was no match, so we have to search for the first
      // match. Note: there is always at least one character available as
      // long as not Dfa.EOF is returned by dfa.match()
      if( onFailedMatch==UNMATCHED_COPY ) {
        int unmatched = 0;
        do {
          out.append((char)(in.read()));
          unmatched += 1;
          a = dfa.match(in, out, smd);
        } while( a==null && unmatched<maxCopy );
        matchStart += unmatched;

      } else if( onFailedMatch==UNMATCHED_DROP ) {
        do {
          in.read();
          a = dfa.match(in, out, smd);
        } while( a==null );

      } else {
        // everything else is a failure
        String emsg = lookahead();
        throw new NomatchException("no matching regular expression "+
            "when looking at `"+emsg+"'");
      }
    }

    // We handle EOF and eofAction as if we have found a match
    if( a==EOF && dfa.eofAction!=null && eofArmed) {
      eofArmed = false;
      return dfa.eofAction;
    }

    return a;
  }
  /**********************************************************************/
  /**
   * fetch a bit of lookahead for use in messages for
   * exceptions. The lookahead is pushed back into the input
   * afterwards.
   */
  private String lookahead() {
    // Read up to 30 chars for a decent error message
    StringBuilder sb = new StringBuilder(30);
    int i;
    try {
      for(i=0; i<30; i++) {
        int ch = in.read();
        if( ch==-1 ) break;
        sb.append((char)ch);
      }
    } catch( java.io.IOException e ) {
      in.pushBack(sb, 0);
      return "IOException when trying to generate context info";
    }
    String result;
    if( i==30 ) result = sb.substring(0, 27)+"...";
    else result = sb.toString()+"[EOF]";
    in.pushBack(sb, 0);
    return result;
  }
  /**********************************************************************/
  /**
   * ==== IMPORTANT ==== This should never again be public because it does not
   * honour the chunks defined by actions+collect mode. It only honours chunks
   * of actions. This should not be made visible to the outside.
   *
   * <p>
   * calls {@link #next} once and applies the returned {@link FaAction}. This
   * includes the special actions configured for non matching input and EOF, if
   * any.
   * </p>
   *
   * <p>
   * Due to the <code>FaAction<code> applied to <code>out</code> after calling
   * <code>next</code> anything can happen to <code>out</code>. In particular it
   * need not become longer.
   * </p>
   *
   * @return {@code false} on EOF, output data may or may not have been
   * produced.
   */
  private boolean crunch(StringBuilder out) throws java.io.IOException {
    action = next(out);
    if( action==null ) return true;

    if( action==EOF ) {
      return false;
    }
    try {
      action.invoke(out, matchStart, this);
    } catch( CallbackException e ) {
      String msg;
      if( matchStart<=out.length() ) {
        msg = e.getMessage()+
            ". The match, possibly changed by the complaining "+
            "action, follows in "+
            "double brackets:\n[["+out.substring(matchStart)+"]]";
      } else {
        msg = e.getMessage() +
            ". Matched and filtered data just before the "+
            "match triggering the exception is: `"+out+"'";
      }

      CallbackException ee = new CallbackException(msg);
      ee.setStackTrace(e.getStackTrace());
      ee.initCause(e.getCause());
      throw ee;
    }
    return true;
  }
  /**********************************************************************/
  /**
   * <p>delivers filtered data in naturally occurring chunks by
   * appending to <code>out</code>. As long as {@link #collect} is
   * <code>false</code>, the naturally occurring chunk is determined by
   * one call to {@link #next next()}, and the application of the
   * returned callback. The data may be prefixed with pushed back data.</p>
   *
   * <p>If an {@link FaAction#invoke FaAction.invoke()} callback
   * switches to <code>collect==true</code>, this function keeps
   * filtering until <code>collect</code> is reset to
   * <code>false</code> by another action callback. This allows the
   * action callbacks to hold back data from being delivered in cases
   * where several action callbacks cooperate in the decision about
   * shipping the data. The action callbacks have access to all the
   * filtered data held back and may treat it as needed. In particular
   * the data can be deleted before <code>collect</code> is switched
   * back to <code>false</code>.</p>
   *
   *
   * <p><b>Hint:</b> This method can be used to tokenize the input. If
   * the machine is put into <code>UNMATCHED_DROP</code> mode, every
   * call to this method will return exactly one match, treated by the
   * action bound to it.</p>
   *
   * @exception java.io.EOFException if
   * EOF is hit while <code>collect==true</code>.
   * @exception CallbackException if a callback throws this exception
   *
   * @return{@code false} on EOF, data may or may not have been produced in
   * {@code out}.
   */
  public boolean read(StringBuilder out) throws IOException {
    if (!noData()) {
      out.append(readBuf, readPos, readBuf.length());
      readPos = 0;
      readBuf.setLength(0);
    }
    return readCollect(out);
  }
  /**********************************************************************/
  /**
   * reads and filters input until at least one character is available
   * or EOF is hit. The field {@link #collect} is hounored in the same
   * way as by {@link #read(StringBuilder)}.
   *
   * @return the resulting character casted to <code>int</code> or -1
   * to signal EOF.
   */
  @Override
  public int read() throws java.io.IOException {
    while (noData()) {
      if (!readCollect(readBuf)) {
        if (noData()) return -1;
        break;
      }
    }

    int ch = readBuf.charAt(readPos++);
    if (noData()) {
      readPos = 0;
      readBuf.setLength(0);
    }
    return ch;
  }
  /**********************************************************************/
  /**
   * returns {@code false} on EOF, data may or may not have been produced in
   * {@code out}.
   */
  private boolean readCollect(StringBuilder out) throws IOException {
    boolean moreData;
    while( (moreData=crunch(out)) && collect ) /**/;
    if( collect ) throw new java.io.EOFException(ECOLLECT);
    return moreData;
  }
  /**********************************************************************/
  private boolean noData() {
    return readPos>=readBuf.length();
  }
  /**********************************************************************/
  @Override
  public void pushBack(StringBuilder src, int fromPos)
  {    
    readBuf.insert(readPos, src, fromPos, src.length());
    src.setLength(fromPos);
  }
  /**********************************************************************/
  /**
   * <p>reads and filters input, copying it to the output
   * until EOF is hit.</p>
   */
  public void filter(StringBuilder out) throws java.io.IOException {
    // Note: we don't have to care about .collect because the caller
    // will only see the final result, while .collect is used to hold
    // back partial results from being delivered with read
    while(crunch(out)) /**/;
  }
  /**********************************************************************/
  /**
   * <p>reads and filters input, copying it to the output
   * until EOF is hit.</p>
   */
  public void filter(PrintStream out)
    throws java.io.IOException
  {
    StringBuilder sb = new StringBuilder(500);
    while( read(sb) ) {
      out.print(sb);
      sb.setLength(0);
      if( out.checkError() ) return;
    }
    out.print(sb);
    out.flush();
  }
  /**********************************************************************/
  /**
   * <p>reads and filters the given input and returns the filtered
   * result.</p>
   */
  public synchronized String filter(String sin)
    throws java.io.IOException
  {
    StringBuilder sb = new StringBuilder(sin.length());
    setIn(new CharSequenceCharSource(sin));
    while( read(sb) ) /**/;
    return sb.toString();
  }
  /**********************************************************************/
  /**
   * <p>run the machine until EOF is hit. This is useful, when the
   * callbacks don't produce output text but rather perform different
   * work.</p>
   * <b>Note:</b>This method sets up a <code>StringBuilder</code>
   * into which filtered data is dumped. The buffer is regularly
   * cleared, in particular after each match. To prevent this from
   * happening, use {@link #collect} as for the other
   * <code>filter</code> methods.
   */
  public synchronized void filter()
      throws java.io.IOException
  {
    StringBuilder sb = new StringBuilder(1000);
    while( read(sb) ) {
      sb.setLength(0);
    }
  }
}