java.text.BreakIterator Java Examples

The following examples show how to use java.text.BreakIterator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SelectionImpl.java    From RichTextFX with BSD 2-Clause "Simplified" License 6 votes vote down vote up
@Override
public void selectWord(int wordPositionInArea) {
    if(area.getLength() == 0) {
        return;
    }

    BreakIterator breakIterator = BreakIterator.getWordInstance( getArea().getLocale() );
    breakIterator.setText(area.getText());
    breakIterator.preceding(wordPositionInArea);
    breakIterator.next();
    int wordStart = breakIterator.current();

    breakIterator.following(wordPositionInArea);
    breakIterator.next();
    int wordEnd = breakIterator.current();

    selectRange(wordStart, wordEnd);
}
 
Example #2
Source File: BreakIteratorTest.java    From dragonwell8_jdk with GNU General Public License v2.0 6 votes vote down vote up
private Vector testLastAndPrevious(BreakIterator bi, String text) {
    int p = bi.last();
    int lastP = p;
    Vector<String> result = new Vector<String>();

    if (p != text.length())
        errln("last() returned " + p + " instead of " + text.length());
    while (p != BreakIterator.DONE) {
        p = bi.previous();
        if (p != BreakIterator.DONE) {
            if (p >= lastP)
                errln("previous() failed to move backward: previous() on position "
                                + lastP + " yielded " + p);

            result.insertElementAt(text.substring(p, lastP), 0);
        }
        else {
            if (lastP != 0)
                errln("previous() returned DONE prematurely: offset was "
                                + lastP + " instead of 0");
        }
        lastP = p;
    }
    return result;
}
 
Example #3
Source File: Chapter3.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 6 votes vote down vote up
public static void usingBreakIterator() {
    Locale currentLocale = new Locale("en", "US");
    BreakIterator sentenceIterator = BreakIterator.getSentenceInstance();
    sentenceIterator.setText(paragraph);
    int boundary = sentenceIterator.first();
    while (boundary != BreakIterator.DONE) {
        int begin = boundary;
        System.out.print(boundary + "-");
        boundary = sentenceIterator.next();
        int end = boundary;
        if (end == BreakIterator.DONE) {
            break;
        }
        System.out.println(boundary + " ["
                + paragraph.substring(begin, end) + "]");
    }
}
 
Example #4
Source File: BreakIteratorTest.java    From dragonwell8_jdk with GNU General Public License v2.0 6 votes vote down vote up
private void testPreceding(BreakIterator bi, String text, int[] boundaries) {
    logln("testPreceding():");
    int p = 0;
    int i = 0;
    try {
        for (i = 0; i <= text.length(); i++) {  // change to <= when new BI code goes in
            int b = bi.preceding(i);
            logln("bi.preceding(" + i + ") -> " + b);
            if (b != boundaries[p])
                errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p]
                      + ", got " + b);

            if (i == boundaries[p + 1])
                ++p;
        }
    } catch (IllegalArgumentException illargExp) {
        errln("IllegalArgumentException caught from preceding() for offset: " + i);
    }
}
 
Example #5
Source File: BreakIteratorTest.java    From dragonwell8_jdk with GNU General Public License v2.0 6 votes vote down vote up
private void testIsBoundary(BreakIterator bi, String text, int[] boundaries) {
    logln("testIsBoundary():");
    int p = 1;
    boolean isB;
    for (int i = 0; i <= text.length(); i++) {  // change to <= when new BI code goes in
        isB = bi.isBoundary(i);
        logln("bi.isBoundary(" + i + ") -> " + isB);

        if (i == boundaries[p]) {
            if (!isB)
                errln("Wrong result from isBoundary() for " + i + ": expected true, got false");
            ++p;
        }
        else {
            if (isB)
                errln("Wrong result from isBoundary() for " + i + ": expected false, got true");
        }
    }
}
 
Example #6
Source File: TestSplittingBreakIterator.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void testBreakIterator(BreakIterator bi, String text, String boundaries) {
  bi.setText(text);

  //Test first & last
  testFirstAndLast(bi, text, boundaries);

  //Test if expected boundaries are consistent with reading them from next() in a loop:
  assertEquals(boundaries, readBoundariesToString(bi, text));

  //Test following() and preceding():
  // get each index, randomized in case their is a sequencing bug:
  List<Integer> indexes = randomIntsBetweenInclusive(text.length() + 1);
  testFollowing(bi, text, boundaries, indexes);
  testPreceding(bi, text, boundaries, indexes);

  //Test previous():
  testPrevious(bi, text, boundaries);
}
 
Example #7
Source File: V8BreakIterator.java    From HtmlUnit-Android with Apache License 2.0 6 votes vote down vote up
/**
 * Returns the type of the break.
 * @return {@code none}, {@code number}, {@code letter}, {@code kana}, {@code ideo} or {@code unknown}
 */
@JsxFunction
public String breakType() {
    if (!typeAlwaysNone_) {
        final int current = current();
        final int previous = breakIterator_.previous();
        if (previous == BreakIterator.DONE) {
            first();
        }
        else {
            next();
        }
        if (current != BreakIterator.DONE && previous != BreakIterator.DONE) {
            final String token = text_.substring(previous, current);
            if (token.matches(".*[a-zA-Z]+.*")) {
                return "letter";
            }
            if (token.matches("[0-9]+")) {
                return "number";
            }
        }
    }
    return "none";
}
 
Example #8
Source File: GranularityIterator.java    From talkback with Apache License 2.0 6 votes vote down vote up
@Override
public @Nullable int[] preceding(int offset) {
  final int textLegth = getIteratorText().length();
  if (textLegth <= 0) {
    return null;
  }
  if (offset <= 0) {
    return null;
  }
  int end = offset;
  if (end > textLegth) {
    end = textLegth;
  }
  while (!breakIterator.isBoundary(end)) {
    end = breakIterator.preceding(end);
    if (end == BreakIterator.DONE) {
      return null;
    }
  }
  final int start = breakIterator.preceding(end);
  if (start == BreakIterator.DONE) {
    return null;
  }
  return getRange(start, end);
}
 
Example #9
Source File: BreakIteratorTest.java    From openjdk-jdk9 with GNU General Public License v2.0 6 votes vote down vote up
private void testPreceding(BreakIterator bi, String text, int[] boundaries) {
    logln("testPreceding():");
    int p = 0;
    int i = 0;
    try {
        for (i = 0; i <= text.length(); i++) {  // change to <= when new BI code goes in
            int b = bi.preceding(i);
            logln("bi.preceding(" + i + ") -> " + b);
            if (b != boundaries[p])
                errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p]
                      + ", got " + b);

            if (i == boundaries[p + 1])
                ++p;
        }
    } catch (IllegalArgumentException illargExp) {
        errln("IllegalArgumentException caught from preceding() for offset: " + i);
    }
}
 
Example #10
Source File: TextComponent.java    From openjdk-8 with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Needed to unify forward and backward searching.
 * The method assumes that s is the text assigned to words.
 */
private int findWordLimit(int index, BreakIterator words, boolean direction,
                                 String s) {
    // Fix for 4256660 and 4256661.
    // Words iterator is different from character and sentence iterators
    // in that end of one word is not necessarily start of another word.
    // Please see java.text.BreakIterator JavaDoc. The code below is
    // based on nextWordStartAfter example from BreakIterator.java.
    int last = (direction == NEXT) ? words.following(index)
                                   : words.preceding(index);
    int current = (direction == NEXT) ? words.next()
                                      : words.previous();
    while (current != BreakIterator.DONE) {
        for (int p = Math.min(last, current); p < Math.max(last, current); p++) {
            if (Character.isLetter(s.charAt(p))) {
                return last;
            }
        }
        last = current;
        current = (direction == NEXT) ? words.next()
                                      : words.previous();
    }
    return BreakIterator.DONE;
}
 
Example #11
Source File: SplitSentence.java    From ignite-book-code-samples with GNU General Public License v3.0 6 votes vote down vote up
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
    //Get the sentence content from the tuple
    String sentence = tuple.getString(0);
    //An iterator to get each word
    BreakIterator boundary=BreakIterator.getWordInstance();
    //Give the iterator the sentence
    boundary.setText(sentence);
    //Find the beginning first word
    int start=boundary.first();
    //Iterate over each word and emit it to the output stream
    for (int end = boundary.next(); end != BreakIterator.DONE; start=end, end=boundary.next()) {
        //get the word
        String word=sentence.substring(start,end);
        //If a word is whitespace characters, replace it with empty
        word=word.replaceAll("\\s+","");
        //if it's an actual word, emit it
        if (!word.equals("")) {
            collector.emit(new Values(word));
        }
    }
}
 
Example #12
Source File: Bug4533872.java    From dragonwell8_jdk with GNU General Public License v2.0 6 votes vote down vote up
void TestNext() {
    iter = BreakIterator.getWordInstance(Locale.US);

    for (int i = 0; i < given.length; i++) {
        iter.setText(given[i]);
        start = iter.first();
        int j = expected[i].length - 1;
        start = iter.next(j);
        end = iter.next();

        if (!expected[i][j].equals(given[i].substring(start, end))) {
            errln("Word break failure: printEachForward() expected:<" +
                  expected[i][j] + ">, got:<" +
                  given[i].substring(start, end) +
                  "> start=" + start + "  end=" + end);
        }
    }
}
 
Example #13
Source File: BreakIteratorTest.java    From TencentKona-8 with GNU General Public License v2.0 5 votes vote down vote up
public BreakIteratorTest()
{
    characterBreak = BreakIterator.getCharacterInstance();
    wordBreak = BreakIterator.getWordInstance();
    lineBreak = BreakIterator.getLineInstance();
    sentenceBreak = BreakIterator.getSentenceInstance();
}
 
Example #14
Source File: TestOpenNLPSentenceBreakIterator.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSliceMiddle() throws Exception {
  NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
  BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
  bi.setText(getCharArrayIterator(PADDING + SENTENCES[0] + PADDING, PADDING.length(), SENTENCES[0].length()));

  test1Sentence(bi, SENTENCES[0]);
}
 
Example #15
Source File: Bug4533872.java    From openjdk-jdk8u with GNU General Public License v2.0 5 votes vote down vote up
void TestPrintAt_2() {
    iter = BreakIterator.getWordInstance(Locale.US);

    int[][] index = {
        {2, 9, 10, 15, 17},
        {1, 9, 10, 13, 16, 18, 20},
        {4, 9, 10, 13, 16, 18, 20},
        {6, 7, 10, 11, 15},
    };

    for (int i = 0; i < given.length; i++) {
        iter.setText(given[i]);

        // Check preceding(0)'s return value - should equals BreakIterator.DONE.
        if (iter.preceding(0) != BreakIterator.DONE) {
             errln("Word break failure: printAt_2() expected:-1(BreakIterator.DONE), got:" +
                   iter.preceding(0));
        }

        for (int j = 0; j < index[i].length; j++) {
            start = iter.preceding(index[i][j]);
            end = iter.next();

            if (!expected[i][j].equals(given[i].substring(start, end))) {
                errln("Word break failure: printAt_2() expected:<" +
                      expected[i][j] + ">, got:<" +
                      given[i].substring(start, end) +
                      "> start=" + start + "  end=" + end);
            }
        }

        // Check next()'s return value - should equals BreakIterator.DONE.
        end = iter.last();
        start = iter.next();
        if (start != BreakIterator.DONE) {
             errln("Word break failure: printAt_2() expected:-1(BreakIterator.DONE), got:" + start);
        }
    }
}
 
Example #16
Source File: SubWordActions.java    From Pydev with Eclipse Public License 1.0 5 votes vote down vote up
@Override
public void run() {
    // Check whether we are in a java code partition and the preference is enabled
    final IPreferenceStore store = getPreferenceStore();
    if (store.getString(SubWordPreferences.WORD_NAVIGATION_STYLE)
            .equals(SubWordPreferences.WORD_NAVIGATION_STYLE_NATIVE)) {
        super.run();
        return;
    }

    final ISourceViewer viewer = getSourceViewer();
    final IDocument document = viewer.getDocument();
    try {
        fIterator.setText((CharacterIterator) new DocumentCharacterIterator(document));
        int position = widgetOffset2ModelOffset(viewer, viewer.getTextWidget().getCaretOffset());
        if (position == -1) {
            return;
        }

        int next = findNextPosition(position);
        if (isBlockSelectionModeEnabled()
                && document.getLineOfOffset(next) != document.getLineOfOffset(position)) {
            super.run(); // may navigate into virtual white space
        } else if (next != BreakIterator.DONE) {
            setCaretPosition(next);
            getTextWidget().showSelection();
            fireSelectionChanged();
        }
    } catch (BadLocationException x) {
        // ignore
    }
}
 
Example #17
Source File: BreakIteratorTest.java    From jdk8u_jdk with GNU General Public License v2.0 5 votes vote down vote up
private void generalIteratorTest(BreakIterator bi, Vector expectedResult) {
    StringBuffer buffer = new StringBuffer();
    String text;
    for (int i = 0; i < expectedResult.size(); i++) {
        text = (String)expectedResult.elementAt(i);
        buffer.append(text);
    }
    text = buffer.toString();

    bi.setText(text);

    Vector nextResults = testFirstAndNext(bi, text);
    Vector previousResults = testLastAndPrevious(bi, text);

    logln("comparing forward and backward...");
    int errs = getErrorCount();
    compareFragmentLists("forward iteration", "backward iteration", nextResults,
                    previousResults);
    if (getErrorCount() == errs) {
        logln("comparing expected and actual...");
        compareFragmentLists("expected result", "actual result", expectedResult,
                        nextResults);
    }

    int[] boundaries = new int[expectedResult.size() + 3];
    boundaries[0] = BreakIterator.DONE;
    boundaries[1] = 0;
    for (int i = 0; i < expectedResult.size(); i++)
        boundaries[i + 2] = boundaries[i + 1] + ((String)expectedResult.elementAt(i)).
                        length();
    boundaries[boundaries.length - 1] = BreakIterator.DONE;

    testFollowing(bi, text, boundaries);
    testPreceding(bi, text, boundaries);
    testIsBoundary(bi, text, boundaries);

    doMultipleSelectionTest(bi, text);
}
 
Example #18
Source File: Bug4912404.java    From jdk8u_jdk with GNU General Public License v2.0 5 votes vote down vote up
public static void main(String[] args) {
    BreakIterator b = BreakIterator.getWordInstance();
    b.setText("abc");
    if (b.equals(null)) {
        throw new RuntimeException("BreakIterator.equals(null) should return false.");
    }
}
 
Example #19
Source File: AccessibleHTML.java    From jdk8u-jdk with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Returns the Segment at <code>index</code> representing either
 * the paragraph or sentence as identified by <code>part</code>, or
 * null if a valid paragraph/sentence can't be found. The offset
 * will point to the start of the word/sentence in the array, and
 * the modelOffset will point to the location of the word/sentence
 * in the model.
 */
private IndexedSegment getSegmentAt(int part, int index)
    throws BadLocationException {

    IndexedSegment seg = getParagraphElementText(index);
    if (seg == null) {
        return null;
    }
    BreakIterator iterator;
    switch (part) {
    case AccessibleText.WORD:
        iterator = BreakIterator.getWordInstance(getLocale());
        break;
    case AccessibleText.SENTENCE:
        iterator = BreakIterator.getSentenceInstance(getLocale());
        break;
    default:
        return null;
    }
    seg.first();
    iterator.setText(seg);
    int end = iterator.following(index - seg.modelOffset + seg.offset);
    if (end == BreakIterator.DONE) {
        return null;
    }
    if (end > seg.offset + seg.count) {
        return null;
    }
    int begin = iterator.previous();
    if (begin == BreakIterator.DONE ||
        begin >= seg.offset + seg.count) {
        return null;
    }
    seg.modelOffset = seg.modelOffset + begin - seg.offset;
    seg.offset = begin;
    seg.count = end - begin;
    return seg;
}
 
Example #20
Source File: ConditionalSpecialCasing.java    From openjdk-jdk9 with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Implements the "Final_Cased" condition
 *
 * Specification: Within the closest word boundaries containing C, there is a cased
 * letter before C, and there is no cased letter after C.
 *
 * Regular Expression:
 *   Before C: [{cased==true}][{wordBoundary!=true}]*
 *   After C: !([{wordBoundary!=true}]*[{cased}])
 */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;

    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
            i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);
        if (isCased(ch)) {

            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index));
                    (i < len) && !wordBoundary.isBoundary(i);
                    i += Character.charCount(ch)) {

                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }

            return true;
        }
    }

    return false;
}
 
Example #21
Source File: CommandExecutionUtils.java    From APICloud-Studio with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Tries to find the word at the given offset.
 * 
 * @param line
 *            the line
 * @param offset
 *            the offset
 * @return the word or <code>null</code> if none
 */
protected static IRegion findWordRegion(String line, int offset)
{
	BreakIterator breakIter = BreakIterator.getWordInstance();
	breakIter.setText(line);

	int start = breakIter.preceding(offset);
	if (start == BreakIterator.DONE)
		start = 0;

	int end = breakIter.following(offset);
	if (end == BreakIterator.DONE)
		end = line.length();

	if (breakIter.isBoundary(offset))
	{
		if (end - offset > offset - start)
		{
			start = offset;
		}
		else
		{
			end = offset;
		}
	}

	if (end == start)
	{
		return new Region(start, 0);
	}
	return new Region(start, end - start);
}
 
Example #22
Source File: Bug4533872.java    From openjdk-jdk8u with GNU General Public License v2.0 5 votes vote down vote up
void TestPrintEachBackward() {
    iter = BreakIterator.getWordInstance(Locale.US);

    for (int i = 0; i < given.length; i++) {
        iter.setText(given[i]);
        end = iter.last();

        // Check current()'s return value - should be same as last()'s.
        current = iter.current();
        if (end != current) {
            errln("Word break failure: printEachBackward() Unexpected current value: current()=" +
                  current + ", expected(=last())=" + end);
        }

        int j;
        for (start = iter.previous(), j = expected[i].length-1;
             start != BreakIterator.DONE;
             end = start, start = iter.previous(), j--) {

            // Check current()'s return value - should be same as previous()'s.
            current = iter.current();
            if (start != current) {
                errln("Word break failure: printEachBackward() Unexpected current value: current()=" +
                      current + ", expected(=previous())=" + start);
            }

            if (!expected[i][j].equals(given[i].substring(start, end))) {
                errln("Word break failure: printEachBackward() expected:<" +
                      expected[i][j] + ">, got:<" +
                      given[i].substring(start, end) +
                      "> start=" + start + "  end=" + end);
            }
        }
    }
}
 
Example #23
Source File: BreakIteratorProviderImpl.java    From dragonwell8_jdk with GNU General Public License v2.0 5 votes vote down vote up
private BreakIterator getBreakInstance(Locale locale,
                                              int type,
                                              String dataName,
                                              String dictionaryName) {
    if (locale == null) {
        throw new NullPointerException();
    }

    LocaleResources lr = LocaleProviderAdapter.forJRE().getLocaleResources(locale);
    String[] classNames = (String[]) lr.getBreakIteratorInfo("BreakIteratorClasses");
    String dataFile = (String) lr.getBreakIteratorInfo(dataName);

    try {
        switch (classNames[type]) {
        case "RuleBasedBreakIterator":
            return new RuleBasedBreakIterator(dataFile);
        case "DictionaryBasedBreakIterator":
            String dictionaryFile = (String) lr.getBreakIteratorInfo(dictionaryName);
            return new DictionaryBasedBreakIterator(dataFile, dictionaryFile);
        default:
            throw new IllegalArgumentException("Invalid break iterator class \"" +
                            classNames[type] + "\"");
        }
    } catch (IOException | MissingResourceException | IllegalArgumentException e) {
        throw new InternalError(e.toString(), e);
    }
}
 
Example #24
Source File: DocLocale.java    From TencentKona-8 with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Constructor
 */
DocLocale(DocEnv docenv, String localeName, boolean useBreakIterator) {
    this.docenv = docenv;
    this.localeName = localeName;
    this.useBreakIterator = useBreakIterator;
    locale = getLocale();
    if (locale == null) {
        docenv.exit();
    } else {
        Locale.setDefault(locale); // NOTE: updating global state
    }
    collator = Collator.getInstance(locale);
    sentenceBreaker = BreakIterator.getSentenceInstance(locale);
}
 
Example #25
Source File: DocSplitterFallbackImpl.java    From relex with Apache License 2.0 5 votes vote down vote up
public DocSplitterFallbackImpl()
{
	buffer = "";
	bdry = BreakIterator.getSentenceInstance(Locale.US);
	bdry.setText("");
	start = bdry.first();
}
 
Example #26
Source File: CustomPostingsHighlighter.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) {
    if (returnNonHighlightedSnippets) {
        //we want to return the first sentence of the first snippet only
        return super.getEmptyHighlight(fieldName, bi, 1);
    }
    return EMPTY_PASSAGE;
}
 
Example #27
Source File: DocLocale.java    From openjdk-8-source with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Constructor
 */
DocLocale(DocEnv docenv, String localeName, boolean useBreakIterator) {
    this.docenv = docenv;
    this.localeName = localeName;
    this.useBreakIterator = useBreakIterator;
    locale = getLocale();
    if (locale == null) {
        docenv.exit();
    } else {
        Locale.setDefault(locale); // NOTE: updating global state
    }
    collator = Collator.getInstance(locale);
    sentenceBreaker = BreakIterator.getSentenceInstance(locale);
}
 
Example #28
Source File: AccessibleHTML.java    From jdk8u-jdk with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Returns the Segment at <code>index</code> representing either
 * the paragraph or sentence as identified by <code>part</code>, or
 * null if a valid paragraph/sentence can't be found. The offset
 * will point to the start of the word/sentence in the array, and
 * the modelOffset will point to the location of the word/sentence
 * in the model.
 */
private IndexedSegment getSegmentAt(int part, int index)
    throws BadLocationException {

    IndexedSegment seg = getParagraphElementText(index);
    if (seg == null) {
        return null;
    }
    BreakIterator iterator;
    switch (part) {
    case AccessibleText.WORD:
        iterator = BreakIterator.getWordInstance(getLocale());
        break;
    case AccessibleText.SENTENCE:
        iterator = BreakIterator.getSentenceInstance(getLocale());
        break;
    default:
        return null;
    }
    seg.first();
    iterator.setText(seg);
    int end = iterator.following(index - seg.modelOffset + seg.offset);
    if (end == BreakIterator.DONE) {
        return null;
    }
    if (end > seg.offset + seg.count) {
        return null;
    }
    int begin = iterator.previous();
    if (begin == BreakIterator.DONE ||
        begin >= seg.offset + seg.count) {
        return null;
    }
    seg.modelOffset = seg.modelOffset + begin - seg.offset;
    seg.offset = begin;
    seg.count = end - begin;
    return seg;
}
 
Example #29
Source File: RuleBasedBreakIterator.java    From jdk8u-dev-jdk with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Sets the iterator to refer to the first boundary position following
 * the specified position.
 * @offset The position from which to begin searching for a break position.
 * @return The position of the first break after the current position.
 */
@Override
public int following(int offset) {

    CharacterIterator text = getText();
    checkOffset(offset, text);

    // Set our internal iteration position (temporarily)
    // to the position passed in.  If this is the _beginning_ position,
    // then we can just use next() to get our return value
    text.setIndex(offset);
    if (offset == text.getBeginIndex()) {
        cachedLastKnownBreak = handleNext();
        return cachedLastKnownBreak;
    }

    // otherwise, we have to sync up first.  Use handlePrevious() to back
    // us up to a known break position before the specified position (if
    // we can determine that the specified position is a break position,
    // we don't back up at all).  This may or may not be the last break
    // position at or before our starting position.  Advance forward
    // from here until we've passed the starting position.  The position
    // we stop on will be the first break position after the specified one.
    int result = cachedLastKnownBreak;
    if (result >= offset || result <= BreakIterator.DONE) {
        result = handlePrevious();
    } else {
        //it might be better to check if handlePrevious() give us closer
        //safe value but handlePrevious() is slow too
        //So, this has to be done carefully
        text.setIndex(result);
    }
    while (result != BreakIterator.DONE && result <= offset) {
        result = handleNext();
    }
    cachedLastKnownBreak = result;
    return result;
}
 
Example #30
Source File: RuleBasedBreakIterator.java    From openjdk-jdk8u with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Set the iterator to analyze a new piece of text.  This function resets
 * the current iteration position to the beginning of the text.
 * @param newText An iterator over the text to analyze.
 */
@Override
public void setText(CharacterIterator newText) {
    // Test iterator to see if we need to wrap it in a SafeCharIterator.
    // The correct behavior for CharacterIterators is to allow the
    // position to be set to the endpoint of the iterator.  Many
    // CharacterIterators do not uphold this, so this is a workaround
    // to permit them to use this class.
    int end = newText.getEndIndex();
    boolean goodIterator;
    try {
        newText.setIndex(end);  // some buggy iterators throw an exception here
        goodIterator = newText.getIndex() == end;
    }
    catch(IllegalArgumentException e) {
        goodIterator = false;
    }

    if (goodIterator) {
        text = newText;
    }
    else {
        text = new SafeCharIterator(newText);
    }
    text.first();

    cachedLastKnownBreak = BreakIterator.DONE;
}