java.text.BreakIterator Java Examples

The following examples show how to use java.text.BreakIterator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: SelectionImpl.java From RichTextFX with BSD 2-Clause "Simplified" License

6 votes

@Override
public void selectWord(int wordPositionInArea) {
    if(area.getLength() == 0) {
        return;
    }

    BreakIterator breakIterator = BreakIterator.getWordInstance( getArea().getLocale() );
    breakIterator.setText(area.getText());
    breakIterator.preceding(wordPositionInArea);
    breakIterator.next();
    int wordStart = breakIterator.current();

    breakIterator.following(wordPositionInArea);
    breakIterator.next();
    int wordEnd = breakIterator.current();

    selectRange(wordStart, wordEnd);
}

Example #2

Source File: BreakIteratorTest.java From dragonwell8_jdk with GNU General Public License v2.0

6 votes

private Vector testLastAndPrevious(BreakIterator bi, String text) {
    int p = bi.last();
    int lastP = p;
    Vector<String> result = new Vector<String>();

    if (p != text.length())
        errln("last() returned " + p + " instead of " + text.length());
    while (p != BreakIterator.DONE) {
        p = bi.previous();
        if (p != BreakIterator.DONE) {
            if (p >= lastP)
                errln("previous() failed to move backward: previous() on position "
                                + lastP + " yielded " + p);

            result.insertElementAt(text.substring(p, lastP), 0);
        }
        else {
            if (lastP != 0)
                errln("previous() returned DONE prematurely: offset was "
                                + lastP + " instead of 0");
        }
        lastP = p;
    }
    return result;
}

Example #3

Source File: Chapter3.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License

6 votes

public static void usingBreakIterator() {
    Locale currentLocale = new Locale("en", "US");
    BreakIterator sentenceIterator = BreakIterator.getSentenceInstance();
    sentenceIterator.setText(paragraph);
    int boundary = sentenceIterator.first();
    while (boundary != BreakIterator.DONE) {
        int begin = boundary;
        System.out.print(boundary + "-");
        boundary = sentenceIterator.next();
        int end = boundary;
        if (end == BreakIterator.DONE) {
            break;
        }
        System.out.println(boundary + " ["
                + paragraph.substring(begin, end) + "]");
    }
}

Example #4

Source File: BreakIteratorTest.java From dragonwell8_jdk with GNU General Public License v2.0

6 votes

private void testPreceding(BreakIterator bi, String text, int[] boundaries) {
    logln("testPreceding():");
    int p = 0;
    int i = 0;
    try {
        for (i = 0; i <= text.length(); i++) {  // change to <= when new BI code goes in
            int b = bi.preceding(i);
            logln("bi.preceding(" + i + ") -> " + b);
            if (b != boundaries[p])
                errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p]
                      + ", got " + b);

            if (i == boundaries[p + 1])
                ++p;
        }
    } catch (IllegalArgumentException illargExp) {
        errln("IllegalArgumentException caught from preceding() for offset: " + i);
    }
}

Example #5

Source File: BreakIteratorTest.java From dragonwell8_jdk with GNU General Public License v2.0

6 votes

private void testIsBoundary(BreakIterator bi, String text, int[] boundaries) {
    logln("testIsBoundary():");
    int p = 1;
    boolean isB;
    for (int i = 0; i <= text.length(); i++) {  // change to <= when new BI code goes in
        isB = bi.isBoundary(i);
        logln("bi.isBoundary(" + i + ") -> " + isB);

        if (i == boundaries[p]) {
            if (!isB)
                errln("Wrong result from isBoundary() for " + i + ": expected true, got false");
            ++p;
        }
        else {
            if (isB)
                errln("Wrong result from isBoundary() for " + i + ": expected false, got true");
        }
    }
}

Example #6

Source File: TestSplittingBreakIterator.java From lucene-solr with Apache License 2.0

6 votes

private void testBreakIterator(BreakIterator bi, String text, String boundaries) {
  bi.setText(text);

  //Test first & last
  testFirstAndLast(bi, text, boundaries);

  //Test if expected boundaries are consistent with reading them from next() in a loop:
  assertEquals(boundaries, readBoundariesToString(bi, text));

  //Test following() and preceding():
  // get each index, randomized in case their is a sequencing bug:
  List<Integer> indexes = randomIntsBetweenInclusive(text.length() + 1);
  testFollowing(bi, text, boundaries, indexes);
  testPreceding(bi, text, boundaries, indexes);

  //Test previous():
  testPrevious(bi, text, boundaries);
}

Example #7

Source File: V8BreakIterator.java From HtmlUnit-Android with Apache License 2.0

6 votes

/**
 * Returns the type of the break.
 * @return {@code none}, {@code number}, {@code letter}, {@code kana}, {@code ideo} or {@code unknown}
 */
@JsxFunction
public String breakType() {
    if (!typeAlwaysNone_) {
        final int current = current();
        final int previous = breakIterator_.previous();
        if (previous == BreakIterator.DONE) {
            first();
        }
        else {
            next();
        }
        if (current != BreakIterator.DONE && previous != BreakIterator.DONE) {
            final String token = text_.substring(previous, current);
            if (token.matches(".*[a-zA-Z]+.*")) {
                return "letter";
            }
            if (token.matches("[0-9]+")) {
                return "number";
            }
        }
    }
    return "none";
}

Example #8

Source File: GranularityIterator.java From talkback with Apache License 2.0

6 votes

@Override
public @Nullable int[] preceding(int offset) {
  final int textLegth = getIteratorText().length();
  if (textLegth <= 0) {
    return null;
  }
  if (offset <= 0) {
    return null;
  }
  int end = offset;
  if (end > textLegth) {
    end = textLegth;
  }
  while (!breakIterator.isBoundary(end)) {
    end = breakIterator.preceding(end);
    if (end == BreakIterator.DONE) {
      return null;
    }
  }
  final int start = breakIterator.preceding(end);
  if (start == BreakIterator.DONE) {
    return null;
  }
  return getRange(start, end);
}

Example #9

Source File: BreakIteratorTest.java From openjdk-jdk9 with GNU General Public License v2.0

6 votes

private void testPreceding(BreakIterator bi, String text, int[] boundaries) {
    logln("testPreceding():");
    int p = 0;
    int i = 0;
    try {
        for (i = 0; i <= text.length(); i++) {  // change to <= when new BI code goes in
            int b = bi.preceding(i);
            logln("bi.preceding(" + i + ") -> " + b);
            if (b != boundaries[p])
                errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p]
                      + ", got " + b);

            if (i == boundaries[p + 1])
                ++p;
        }
    } catch (IllegalArgumentException illargExp) {
        errln("IllegalArgumentException caught from preceding() for offset: " + i);
    }
}

Example #10

Source File: TextComponent.java From openjdk-8 with GNU General Public License v2.0

6 votes

/**
 * Needed to unify forward and backward searching.
 * The method assumes that s is the text assigned to words.
 */
private int findWordLimit(int index, BreakIterator words, boolean direction,
                                 String s) {
    // Fix for 4256660 and 4256661.
    // Words iterator is different from character and sentence iterators
    // in that end of one word is not necessarily start of another word.
    // Please see java.text.BreakIterator JavaDoc. The code below is
    // based on nextWordStartAfter example from BreakIterator.java.
    int last = (direction == NEXT) ? words.following(index)
                                   : words.preceding(index);
    int current = (direction == NEXT) ? words.next()
                                      : words.previous();
    while (current != BreakIterator.DONE) {
        for (int p = Math.min(last, current); p < Math.max(last, current); p++) {
            if (Character.isLetter(s.charAt(p))) {
                return last;
            }
        }
        last = current;
        current = (direction == NEXT) ? words.next()
                                      : words.previous();
    }
    return BreakIterator.DONE;
}

Example #11

Source File: SplitSentence.java From ignite-book-code-samples with GNU General Public License v3.0

6 votes

@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
    //Get the sentence content from the tuple
    String sentence = tuple.getString(0);
    //An iterator to get each word
    BreakIterator boundary=BreakIterator.getWordInstance();
    //Give the iterator the sentence
    boundary.setText(sentence);
    //Find the beginning first word
    int start=boundary.first();
    //Iterate over each word and emit it to the output stream
    for (int end = boundary.next(); end != BreakIterator.DONE; start=end, end=boundary.next()) {
        //get the word
        String word=sentence.substring(start,end);
        //If a word is whitespace characters, replace it with empty
        word=word.replaceAll("\\s+","");
        //if it's an actual word, emit it
        if (!word.equals("")) {
            collector.emit(new Values(word));
        }
    }
}

Example #12

Source File: Bug4533872.java From dragonwell8_jdk with GNU General Public License v2.0

6 votes

void TestNext() {
    iter = BreakIterator.getWordInstance(Locale.US);

    for (int i = 0; i < given.length; i++) {
        iter.setText(given[i]);
        start = iter.first();
        int j = expected[i].length - 1;
        start = iter.next(j);
        end = iter.next();

        if (!expected[i][j].equals(given[i].substring(start, end))) {
            errln("Word break failure: printEachForward() expected:<" +
                  expected[i][j] + ">, got:<" +
                  given[i].substring(start, end) +
                  "> start=" + start + "  end=" + end);
        }
    }
}

Example #13

Source File: BreakIteratorTest.java From TencentKona-8 with GNU General Public License v2.0

5 votes

public BreakIteratorTest()
{
    characterBreak = BreakIterator.getCharacterInstance();
    wordBreak = BreakIterator.getWordInstance();
    lineBreak = BreakIterator.getLineInstance();
    sentenceBreak = BreakIterator.getSentenceInstance();
}

Example #14

Source File: TestOpenNLPSentenceBreakIterator.java From lucene-solr with Apache License 2.0

5 votes

public void testSliceMiddle() throws Exception {
  NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
  BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
  bi.setText(getCharArrayIterator(PADDING + SENTENCES[0] + PADDING, PADDING.length(), SENTENCES[0].length()));

  test1Sentence(bi, SENTENCES[0]);
}

Example #15

Source File: Bug4533872.java From openjdk-jdk8u with GNU General Public License v2.0

5 votes

void TestPrintAt_2() {
    iter = BreakIterator.getWordInstance(Locale.US);

    int[][] index = {
        {2, 9, 10, 15, 17},
        {1, 9, 10, 13, 16, 18, 20},
        {4, 9, 10, 13, 16, 18, 20},
        {6, 7, 10, 11, 15},
    };

    for (int i = 0; i < given.length; i++) {
        iter.setText(given[i]);

        // Check preceding(0)'s return value - should equals BreakIterator.DONE.
        if (iter.preceding(0) != BreakIterator.DONE) {
             errln("Word break failure: printAt_2() expected:-1(BreakIterator.DONE), got:" +
                   iter.preceding(0));
        }

        for (int j = 0; j < index[i].length; j++) {
            start = iter.preceding(index[i][j]);
            end = iter.next();

            if (!expected[i][j].equals(given[i].substring(start, end))) {
                errln("Word break failure: printAt_2() expected:<" +
                      expected[i][j] + ">, got:<" +
                      given[i].substring(start, end) +
                      "> start=" + start + "  end=" + end);
            }
        }

        // Check next()'s return value - should equals BreakIterator.DONE.
        end = iter.last();
        start = iter.next();
        if (start != BreakIterator.DONE) {
             errln("Word break failure: printAt_2() expected:-1(BreakIterator.DONE), got:" + start);
        }
    }
}

Example #16

Source File: SubWordActions.java From Pydev with Eclipse Public License 1.0

5 votes

@Override
public void run() {
    // Check whether we are in a java code partition and the preference is enabled
    final IPreferenceStore store = getPreferenceStore();
    if (store.getString(SubWordPreferences.WORD_NAVIGATION_STYLE)
            .equals(SubWordPreferences.WORD_NAVIGATION_STYLE_NATIVE)) {
        super.run();
        return;
    }

    final ISourceViewer viewer = getSourceViewer();
    final IDocument document = viewer.getDocument();
    try {
        fIterator.setText((CharacterIterator) new DocumentCharacterIterator(document));
        int position = widgetOffset2ModelOffset(viewer, viewer.getTextWidget().getCaretOffset());
        if (position == -1) {
            return;
        }

        int next = findNextPosition(position);
        if (isBlockSelectionModeEnabled()
                && document.getLineOfOffset(next) != document.getLineOfOffset(position)) {
            super.run(); // may navigate into virtual white space
        } else if (next != BreakIterator.DONE) {
            setCaretPosition(next);
            getTextWidget().showSelection();
            fireSelectionChanged();
        }
    } catch (BadLocationException x) {
        // ignore
    }
}

Example #17

Source File: BreakIteratorTest.java From jdk8u_jdk with GNU General Public License v2.0

5 votes

private void generalIteratorTest(BreakIterator bi, Vector expectedResult) {
    StringBuffer buffer = new StringBuffer();
    String text;
    for (int i = 0; i < expectedResult.size(); i++) {
        text = (String)expectedResult.elementAt(i);
        buffer.append(text);
    }
    text = buffer.toString();

    bi.setText(text);

    Vector nextResults = testFirstAndNext(bi, text);
    Vector previousResults = testLastAndPrevious(bi, text);

    logln("comparing forward and backward...");
    int errs = getErrorCount();
    compareFragmentLists("forward iteration", "backward iteration", nextResults,
                    previousResults);
    if (getErrorCount() == errs) {
        logln("comparing expected and actual...");
        compareFragmentLists("expected result", "actual result", expectedResult,
                        nextResults);
    }

    int[] boundaries = new int[expectedResult.size() + 3];
    boundaries[0] = BreakIterator.DONE;
    boundaries[1] = 0;
    for (int i = 0; i < expectedResult.size(); i++)
        boundaries[i + 2] = boundaries[i + 1] + ((String)expectedResult.elementAt(i)).
                        length();
    boundaries[boundaries.length - 1] = BreakIterator.DONE;

    testFollowing(bi, text, boundaries);
    testPreceding(bi, text, boundaries);
    testIsBoundary(bi, text, boundaries);

    doMultipleSelectionTest(bi, text);
}

Example #18

Source File: Bug4912404.java From jdk8u_jdk with GNU General Public License v2.0

5 votes

public static void main(String[] args) {
    BreakIterator b = BreakIterator.getWordInstance();
    b.setText("abc");
    if (b.equals(null)) {
        throw new RuntimeException("BreakIterator.equals(null) should return false.");
    }
}

Example #19

Source File: AccessibleHTML.java From jdk8u-jdk with GNU General Public License v2.0

5 votes

/**
 * Returns the Segment at <code>index</code> representing either
 * the paragraph or sentence as identified by <code>part</code>, or
 * null if a valid paragraph/sentence can't be found. The offset
 * will point to the start of the word/sentence in the array, and
 * the modelOffset will point to the location of the word/sentence
 * in the model.
 */
private IndexedSegment getSegmentAt(int part, int index)
    throws BadLocationException {

    IndexedSegment seg = getParagraphElementText(index);
    if (seg == null) {
        return null;
    }
    BreakIterator iterator;
    switch (part) {
    case AccessibleText.WORD:
        iterator = BreakIterator.getWordInstance(getLocale());
        break;
    case AccessibleText.SENTENCE:
        iterator = BreakIterator.getSentenceInstance(getLocale());
        break;
    default:
        return null;
    }
    seg.first();
    iterator.setText(seg);
    int end = iterator.following(index - seg.modelOffset + seg.offset);
    if (end == BreakIterator.DONE) {
        return null;
    }
    if (end > seg.offset + seg.count) {
        return null;
    }
    int begin = iterator.previous();
    if (begin == BreakIterator.DONE ||
        begin >= seg.offset + seg.count) {
        return null;
    }
    seg.modelOffset = seg.modelOffset + begin - seg.offset;
    seg.offset = begin;
    seg.count = end - begin;
    return seg;
}

Example #20

Source File: ConditionalSpecialCasing.java From openjdk-jdk9 with GNU General Public License v2.0

5 votes

/**
 * Implements the "Final_Cased" condition
 *
 * Specification: Within the closest word boundaries containing C, there is a cased
 * letter before C, and there is no cased letter after C.
 *
 * Regular Expression:
 *   Before C: [{cased==true}][{wordBoundary!=true}]*
 *   After C: !([{wordBoundary!=true}]*[{cased}])
 */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;

    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
            i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);
        if (isCased(ch)) {

            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index));
                    (i < len) && !wordBoundary.isBoundary(i);
                    i += Character.charCount(ch)) {

                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }

            return true;
        }
    }

    return false;
}

Example #21

Source File: CommandExecutionUtils.java From APICloud-Studio with GNU General Public License v3.0

5 votes

/**
 * Tries to find the word at the given offset.
 * 
 * @param line
 *            the line
 * @param offset
 *            the offset
 * @return the word or <code>null</code> if none
 */
protected static IRegion findWordRegion(String line, int offset)
{
	BreakIterator breakIter = BreakIterator.getWordInstance();
	breakIter.setText(line);

	int start = breakIter.preceding(offset);
	if (start == BreakIterator.DONE)
		start = 0;

	int end = breakIter.following(offset);
	if (end == BreakIterator.DONE)
		end = line.length();

	if (breakIter.isBoundary(offset))
	{
		if (end - offset > offset - start)
		{
			start = offset;
		}
		else
		{
			end = offset;
		}
	}

	if (end == start)
	{
		return new Region(start, 0);
	}
	return new Region(start, end - start);
}

Example #22

Source File: Bug4533872.java From openjdk-jdk8u with GNU General Public License v2.0

5 votes

void TestPrintEachBackward() {
    iter = BreakIterator.getWordInstance(Locale.US);

    for (int i = 0; i < given.length; i++) {
        iter.setText(given[i]);
        end = iter.last();

        // Check current()'s return value - should be same as last()'s.
        current = iter.current();
        if (end != current) {
            errln("Word break failure: printEachBackward() Unexpected current value: current()=" +
                  current + ", expected(=last())=" + end);
        }

        int j;
        for (start = iter.previous(), j = expected[i].length-1;
             start != BreakIterator.DONE;
             end = start, start = iter.previous(), j--) {

            // Check current()'s return value - should be same as previous()'s.
            current = iter.current();
            if (start != current) {
                errln("Word break failure: printEachBackward() Unexpected current value: current()=" +
                      current + ", expected(=previous())=" + start);
            }

            if (!expected[i][j].equals(given[i].substring(start, end))) {
                errln("Word break failure: printEachBackward() expected:<" +
                      expected[i][j] + ">, got:<" +
                      given[i].substring(start, end) +
                      "> start=" + start + "  end=" + end);
            }
        }
    }
}

Example #23

Source File: BreakIteratorProviderImpl.java From dragonwell8_jdk with GNU General Public License v2.0

5 votes

private BreakIterator getBreakInstance(Locale locale,
                                              int type,
                                              String dataName,
                                              String dictionaryName) {
    if (locale == null) {
        throw new NullPointerException();
    }

    LocaleResources lr = LocaleProviderAdapter.forJRE().getLocaleResources(locale);
    String[] classNames = (String[]) lr.getBreakIteratorInfo("BreakIteratorClasses");
    String dataFile = (String) lr.getBreakIteratorInfo(dataName);

    try {
        switch (classNames[type]) {
        case "RuleBasedBreakIterator":
            return new RuleBasedBreakIterator(dataFile);
        case "DictionaryBasedBreakIterator":
            String dictionaryFile = (String) lr.getBreakIteratorInfo(dictionaryName);
            return new DictionaryBasedBreakIterator(dataFile, dictionaryFile);
        default:
            throw new IllegalArgumentException("Invalid break iterator class \"" +
                            classNames[type] + "\"");
        }
    } catch (IOException | MissingResourceException | IllegalArgumentException e) {
        throw new InternalError(e.toString(), e);
    }
}

Example #24

Source File: DocLocale.java From TencentKona-8 with GNU General Public License v2.0

5 votes

/**
 * Constructor
 */
DocLocale(DocEnv docenv, String localeName, boolean useBreakIterator) {
    this.docenv = docenv;
    this.localeName = localeName;
    this.useBreakIterator = useBreakIterator;
    locale = getLocale();
    if (locale == null) {
        docenv.exit();
    } else {
        Locale.setDefault(locale); // NOTE: updating global state
    }
    collator = Collator.getInstance(locale);
    sentenceBreaker = BreakIterator.getSentenceInstance(locale);
}

Example #25

Source File: DocSplitterFallbackImpl.java From relex with Apache License 2.0

5 votes

public DocSplitterFallbackImpl()
{
	buffer = "";
	bdry = BreakIterator.getSentenceInstance(Locale.US);
	bdry.setText("");
	start = bdry.first();
}

Example #26

Source File: CustomPostingsHighlighter.java From Elasticsearch with Apache License 2.0

5 votes

@Override
protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) {
    if (returnNonHighlightedSnippets) {
        //we want to return the first sentence of the first snippet only
        return super.getEmptyHighlight(fieldName, bi, 1);
    }
    return EMPTY_PASSAGE;
}

Example #27

Source File: DocLocale.java From openjdk-8-source with GNU General Public License v2.0

5 votes

/**
 * Constructor
 */
DocLocale(DocEnv docenv, String localeName, boolean useBreakIterator) {
    this.docenv = docenv;
    this.localeName = localeName;
    this.useBreakIterator = useBreakIterator;
    locale = getLocale();
    if (locale == null) {
        docenv.exit();
    } else {
        Locale.setDefault(locale); // NOTE: updating global state
    }
    collator = Collator.getInstance(locale);
    sentenceBreaker = BreakIterator.getSentenceInstance(locale);
}

Example #28

Source File: AccessibleHTML.java From jdk8u-jdk with GNU General Public License v2.0

5 votes

/**
 * Returns the Segment at <code>index</code> representing either
 * the paragraph or sentence as identified by <code>part</code>, or
 * null if a valid paragraph/sentence can't be found. The offset
 * will point to the start of the word/sentence in the array, and
 * the modelOffset will point to the location of the word/sentence
 * in the model.
 */
private IndexedSegment getSegmentAt(int part, int index)
    throws BadLocationException {

    IndexedSegment seg = getParagraphElementText(index);
    if (seg == null) {
        return null;
    }
    BreakIterator iterator;
    switch (part) {
    case AccessibleText.WORD:
        iterator = BreakIterator.getWordInstance(getLocale());
        break;
    case AccessibleText.SENTENCE:
        iterator = BreakIterator.getSentenceInstance(getLocale());
        break;
    default:
        return null;
    }
    seg.first();
    iterator.setText(seg);
    int end = iterator.following(index - seg.modelOffset + seg.offset);
    if (end == BreakIterator.DONE) {
        return null;
    }
    if (end > seg.offset + seg.count) {
        return null;
    }
    int begin = iterator.previous();
    if (begin == BreakIterator.DONE ||
        begin >= seg.offset + seg.count) {
        return null;
    }
    seg.modelOffset = seg.modelOffset + begin - seg.offset;
    seg.offset = begin;
    seg.count = end - begin;
    return seg;
}

Example #29

Source File: RuleBasedBreakIterator.java From jdk8u-dev-jdk with GNU General Public License v2.0

5 votes

/**
 * Sets the iterator to refer to the first boundary position following
 * the specified position.
 * @offset The position from which to begin searching for a break position.
 * @return The position of the first break after the current position.
 */
@Override
public int following(int offset) {

    CharacterIterator text = getText();
    checkOffset(offset, text);

    // Set our internal iteration position (temporarily)
    // to the position passed in.  If this is the _beginning_ position,
    // then we can just use next() to get our return value
    text.setIndex(offset);
    if (offset == text.getBeginIndex()) {
        cachedLastKnownBreak = handleNext();
        return cachedLastKnownBreak;
    }

    // otherwise, we have to sync up first.  Use handlePrevious() to back
    // us up to a known break position before the specified position (if
    // we can determine that the specified position is a break position,
    // we don't back up at all).  This may or may not be the last break
    // position at or before our starting position.  Advance forward
    // from here until we've passed the starting position.  The position
    // we stop on will be the first break position after the specified one.
    int result = cachedLastKnownBreak;
    if (result >= offset || result <= BreakIterator.DONE) {
        result = handlePrevious();
    } else {
        //it might be better to check if handlePrevious() give us closer
        //safe value but handlePrevious() is slow too
        //So, this has to be done carefully
        text.setIndex(result);
    }
    while (result != BreakIterator.DONE && result <= offset) {
        result = handleNext();
    }
    cachedLastKnownBreak = result;
    return result;
}

Example #30

Source File: RuleBasedBreakIterator.java From openjdk-jdk8u with GNU General Public License v2.0

5 votes

/**
 * Set the iterator to analyze a new piece of text.  This function resets
 * the current iteration position to the beginning of the text.
 * @param newText An iterator over the text to analyze.
 */
@Override
public void setText(CharacterIterator newText) {
    // Test iterator to see if we need to wrap it in a SafeCharIterator.
    // The correct behavior for CharacterIterators is to allow the
    // position to be set to the endpoint of the iterator.  Many
    // CharacterIterators do not uphold this, so this is a workaround
    // to permit them to use this class.
    int end = newText.getEndIndex();
    boolean goodIterator;
    try {
        newText.setIndex(end);  // some buggy iterators throw an exception here
        goodIterator = newText.getIndex() == end;
    }
    catch(IllegalArgumentException e) {
        goodIterator = false;
    }

    if (goodIterator) {
        text = newText;
    }
    else {
        text = new SafeCharIterator(newText);
    }
    text.first();

    cachedLastKnownBreak = BreakIterator.DONE;
}