Java Code Examples for com.ibm.icu.text.BreakIterator#DONE

The following examples show how to use com.ibm.icu.text.BreakIterator#DONE . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SimpleFilteredSentenceBreakIterator.java    From fitnotifications with Apache License 2.0 6 votes vote down vote up
/**
 * Given that the delegate has already given its "initial" answer,
 * find the NEXT actual (non-suppressed) break.
 * @param n initial position from delegate
 * @return new break position or BreakIterator.DONE
 */
private final int internalNext(int n) {
    if (n == BreakIterator.DONE || // at end or
            backwardsTrie == null) { // .. no backwards table loaded == no exceptions
        return n;
    }
    resetState();

    final int textLen = text.getLength();

    while (n != BreakIterator.DONE && n != textLen) {
        // outer loop runs once per underlying break (from fDelegate).
        // loops while 'n' points to an exception.

        if (breakExceptionAt(n)) {
            // n points to a break exception
            n = delegate.next();
        } else {
            // no exception at this spot
            return n;
        }
    }
    return n; //hit underlying DONE or break at end of text
}
 
Example 2
Source File: SimpleFilteredSentenceBreakIterator.java    From fitnotifications with Apache License 2.0 6 votes vote down vote up
/**
 * Given that the delegate has already given its "initial" answer,
 * find the PREV actual (non-suppressed) break.
 * @param n initial position from delegate
 * @return new break position or BreakIterator.DONE
 */
private final int internalPrev(int n) {
    if (n == 0 || n == BreakIterator.DONE || // at end or
            backwardsTrie == null) { // .. no backwards table loaded == no exceptions
        return n;
    }
    resetState();

    while (n != BreakIterator.DONE && n != 0) {
        // outer loop runs once per underlying break (from fDelegate).
        // loops while 'n' points to an exception.

        if (breakExceptionAt(n)) {
            // n points to a break exception
            n = delegate.previous();
        } else {
            // no exception at this spot
            return n;
        }
    }
    return n; //hit underlying DONE or break at end of text
}
 
Example 3
Source File: BreakIteratorWrapper.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 6 votes vote down vote up
private int calcStatus(int current, int next) {
    if (current == BreakIterator.DONE || next == BreakIterator.DONE) {
        return RuleBasedBreakIterator.WORD_NONE;
    }
    int begin = start + current;
    int end = start + next;
    int codepoint;
    for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) {
        codepoint = UTF16.charAt(text, 0, end, begin);
        if (UCharacter.isDigit(codepoint)) {
            return RuleBasedBreakIterator.WORD_NUMBER;
        } else if (UCharacter.isLetter(codepoint)) {
            return RuleBasedBreakIterator.WORD_LETTER;
        }
    }
    return RuleBasedBreakIterator.WORD_NONE;
}
 
Example 4
Source File: IcuTokenizer.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 6 votes vote down vote up
private boolean incrementTokenBuffer() {
    int start = breaker.current();
    if (start == BreakIterator.DONE) {
        throw new IllegalStateException();
    }
    // find the next set of boundaries, skipping over non-tokens (rule status 0)
    int end = breaker.next();
    while (end != BreakIterator.DONE && breaker.getRuleStatus() == 0) {
        start = end;
        end = breaker.next();
    }
    if (end == BreakIterator.DONE) {
        return false;
    }
    termAtt.copyBuffer(buffer, start, end - start);
    offsetAtt.setOffset(correctOffset(offset + start), correctOffset(offset + end));
    typeAtt.setType(config.getType(breaker.getScriptCode(), breaker.getRuleStatus()));
    scriptAtt.setCode(breaker.getScriptCode());
    return true;
}
 
Example 5
Source File: SpellCheckIterator.java    From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 6 votes vote down vote up
/**
 * Skip the tokens until the stop character is reached.
 *
 * @param begin the begin index
 * @param stop the stop character
 */
protected final void skipTokens(final int begin, final int stop) {
	final boolean isStoppingOnWhiteSpace= stop == WHITE_SPACE_TOKEN;
	int end= begin;
	while (end < fContent.length()) {
		char ch= fContent.charAt(end);
		if (ch == stop || isStoppingOnWhiteSpace && Character.isWhitespace(ch))
			break;
		end++;
	}

	if (end < fContent.length()) {

		fNext= end;
		fPredecessor= fNext;

		fSuccessor= fWordIterator.following(fNext);
	} else
		fSuccessor= BreakIterator.DONE;
}
 
Example 6
Source File: ICUWordRecognizer.java    From birt with Eclipse Public License 1.0 5 votes vote down vote up
public Word getNextWord( )
{
	int start = wordBreaker.current( );
	end = wordBreaker.next( );
	if(end!=BreakIterator.DONE)
	{
		return new Word(text, start, end ); 
	}
	else
	{
		return null;
	}
}
 
Example 7
Source File: WordRecognizerWrapper.java    From birt with Eclipse Public License 1.0 5 votes vote down vote up
public Word getNextWord( )
{
	start = end;
	end = breakIterator.next( );
	if ( end != BreakIterator.DONE )
	{
		return new Word( text, start, end );	
	}
	return null;
}
 
Example 8
Source File: SpellCheckIterator.java    From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 5 votes vote down vote up
/**
 * Creates a new spell check iterator.
 *
 * @param document the document containing the specified partition
 * @param region the region to spell check
 * @param locale the locale to use for spell checking
 * @param breakIterator the break-iterator
 */
public SpellCheckIterator(IDocument document, IRegion region, Locale locale, BreakIterator breakIterator) {
	fOffset= region.getOffset();
	fWordIterator= breakIterator;
	fDelimiter= TextUtilities.getDefaultLineDelimiter(document);

	String content;
	try {

		content= document.get(region.getOffset(), region.getLength());
		if (content.startsWith(NLSElement.TAG_PREFIX))
			content= ""; //$NON-NLS-1$

	} catch (Exception exception) {
		content= ""; //$NON-NLS-1$
	}
	fContent= content;

	fWordIterator.setText(content);
	fPredecessor= fWordIterator.first();
	fSuccessor= fWordIterator.next();

	final BreakIterator iterator= BreakIterator.getSentenceInstance(locale);
	iterator.setText(content);

	int offset= iterator.current();
	while (offset != BreakIterator.DONE) {

		fSentenceBreaks.add(new Integer(offset));
		offset= iterator.next();
	}
}
 
Example 9
Source File: JavaEditor.java    From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 5 votes vote down vote up
/**
 * Finds the previous position before the given position.
 *
 * @param position the current position
 * @return the previous position
 */
protected int findPreviousPosition(int position) {
	ISourceViewer viewer= getSourceViewer();
	int widget= -1;
	int previous= position;
	while (previous != BreakIterator.DONE && widget == -1) { // XXX: optimize
		previous= fIterator.preceding(previous);
		if (previous != BreakIterator.DONE)
			widget= modelOffset2WidgetOffset(viewer, previous);
	}

	IDocument document= viewer.getDocument();
	LinkedModeModel model= LinkedModeModel.getModel(document, position);
	if (model != null && previous != BreakIterator.DONE) {
		LinkedPosition linkedPosition= model.findPosition(new LinkedPosition(document, position, 0));
		if (linkedPosition != null) {
			int linkedPositionOffset= linkedPosition.getOffset();
			if (position != linkedPositionOffset && previous < linkedPositionOffset)
				previous= linkedPositionOffset;
		} else {
			LinkedPosition previousLinkedPosition= model.findPosition(new LinkedPosition(document, previous, 0));
			if (previousLinkedPosition != null) {
				int previousLinkedPositionEnd= previousLinkedPosition.getOffset() + previousLinkedPosition.getLength();
				if (position != previousLinkedPositionEnd && previous < previousLinkedPositionEnd)
					previous= previousLinkedPositionEnd;
			}
		}
	}

	return previous;
}
 
Example 10
Source File: JavaEditor.java    From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 5 votes vote down vote up
@Override
public void run() {
	// Check whether we are in a java code partition and the preference is enabled
	final IPreferenceStore store= getPreferenceStore();
	if (!store.getBoolean(PreferenceConstants.EDITOR_SUB_WORD_NAVIGATION)) {
		super.run();
		return;
	}

	final ISourceViewer viewer= getSourceViewer();
	final IDocument document= viewer.getDocument();
	try {
		fIterator.setText((CharacterIterator)new DocumentCharacterIterator(document));
		int position= widgetOffset2ModelOffset(viewer, viewer.getTextWidget().getCaretOffset());
		if (position == -1)
			return;

		int previous= findPreviousPosition(position);
		if (isBlockSelectionModeEnabled() && document.getLineOfOffset(previous) != document.getLineOfOffset(position)) {
			super.run(); // may navigate into virtual white space
		} else if (previous != BreakIterator.DONE) {
			setCaretPosition(previous);
			getTextWidget().showSelection();
			fireSelectionChanged();
		}
	} catch (BadLocationException x) {
		// ignore - getLineOfOffset failed
	}

}
 
Example 11
Source File: JavaEditor.java    From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 5 votes vote down vote up
/**
 * Finds the next position after the given position.
 *
 * @param position the current position
 * @return the next position
 */
protected int findNextPosition(int position) {
	ISourceViewer viewer= getSourceViewer();
	int widget= -1;
	int next= position;
	while (next != BreakIterator.DONE && widget == -1) { // XXX: optimize
		next= fIterator.following(next);
		if (next != BreakIterator.DONE)
			widget= modelOffset2WidgetOffset(viewer, next);
	}

	IDocument document= viewer.getDocument();
	LinkedModeModel model= LinkedModeModel.getModel(document, position);
	if (model != null && next != BreakIterator.DONE) {
		LinkedPosition linkedPosition= model.findPosition(new LinkedPosition(document, position, 0));
		if (linkedPosition != null) {
			int linkedPositionEnd= linkedPosition.getOffset() + linkedPosition.getLength();
			if (position != linkedPositionEnd && linkedPositionEnd < next)
				next= linkedPositionEnd;
		} else {
			LinkedPosition nextLinkedPosition= model.findPosition(new LinkedPosition(document, next, 0));
			if (nextLinkedPosition != null) {
				int nextLinkedPositionOffset= nextLinkedPosition.getOffset();
				if (position != nextLinkedPositionOffset && nextLinkedPositionOffset < next)
					next= nextLinkedPositionOffset;
			}
		}
	}

	return next;
}
 
Example 12
Source File: JavaEditor.java    From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 5 votes vote down vote up
@Override
public void run() {
	// Check whether we are in a java code partition and the preference is enabled
	final IPreferenceStore store= getPreferenceStore();
	if (!store.getBoolean(PreferenceConstants.EDITOR_SUB_WORD_NAVIGATION)) {
		super.run();
		return;
	}

	final ISourceViewer viewer= getSourceViewer();
	final IDocument document= viewer.getDocument();
	try {
		fIterator.setText((CharacterIterator)new DocumentCharacterIterator(document));
		int position= widgetOffset2ModelOffset(viewer, viewer.getTextWidget().getCaretOffset());
		if (position == -1)
			return;

		int next= findNextPosition(position);
		if (isBlockSelectionModeEnabled() && document.getLineOfOffset(next) != document.getLineOfOffset(position)) {
			super.run(); // may navigate into virtual white space
		} else if (next != BreakIterator.DONE) {
			setCaretPosition(next);
			getTextWidget().showSelection();
			fireSelectionChanged();
		}
	} catch (BadLocationException x) {
		// ignore
	}
}
 
Example 13
Source File: RenamingNameSuggestor.java    From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 5 votes vote down vote up
/**
 * Grab a list of camelCase-separated suffixes from the typeName, for
 * example:
 *
 * "JavaElementName" => { "Java", "Element", "Name }
 *
 * "ASTNode" => { "AST", "Node" }
 *
 */
private String[] getSuffixes(String typeName) {
	List<String> suffixes= new ArrayList<String>();
	JavaWordIterator iterator= new JavaWordIterator();
	iterator.setText(typeName);
	int lastmatch= 0;
	int match;
	while ( (match= iterator.next()) != BreakIterator.DONE) {
		suffixes.add(typeName.substring(lastmatch, match));
		lastmatch= match;
	}
	return suffixes.toArray(new String[0]);
}
 
Example 14
Source File: BreakIteratorWrapper.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Returns current rule status for the text between breaks. (determines token type) */
private int calcStatus(int current, int next) {
  // to support presentation selectors, we need to handle alphanum, num, and none at least, so currently not worth optimizing.
  // https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3AEmoji%3A%5D-%5B%3AEmoji_Presentation%3A%5D&g=Word_Break&i=
  if (next != BreakIterator.DONE && isEmoji(current, next)) {
    return ICUTokenizerConfig.EMOJI_SEQUENCE_STATUS;
  } else {
    return rbbi.getRuleStatus();
  }
}
 
Example 15
Source File: SpellCheckIterator.java    From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 4 votes vote down vote up
public final boolean hasNext() {
	return fSuccessor != BreakIterator.DONE;
}
 
Example 16
Source File: SpellCheckIterator.java    From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 4 votes vote down vote up
/**
 * Determines the next token to be spell checked.
 *
 * @return the next token to be spell checked, or <code>null</code>
 *         iff the next token is not a candidate for spell checking.
 */
protected String nextToken() {

	String token= null;

	fPrevious= fPredecessor;
	fStartsSentence= false;

	nextBreak();

	boolean update= false;
	if (fNext - fPrevious > 0) {

		if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == IJavaDocTagConstants.JAVADOC_TAG_PREFIX) {

			nextBreak();
			if (Character.isLetter(fContent.charAt(fPrevious + 1))) {
				update= true;
				token= fContent.substring(fPrevious, fNext);
			} else
				fPredecessor= fNext;

		} else if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == IHtmlTagConstants.HTML_TAG_PREFIX && (Character.isLetter(fContent.charAt(fNext)) || fContent.charAt(fNext) == '/')) {

			if (fContent.startsWith(IHtmlTagConstants.HTML_CLOSE_PREFIX, fPrevious))
				nextBreak();

			nextBreak();

			if (fSuccessor != BreakIterator.DONE && fContent.charAt(fNext) == IHtmlTagConstants.HTML_TAG_POSTFIX) {

				nextBreak();
				if (fSuccessor != BreakIterator.DONE) {
					update= true;
					token= fContent.substring(fPrevious, fNext);
				}
			}
		} else if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == IHtmlTagConstants.HTML_ENTITY_START && (Character.isLetter(fContent.charAt(fNext)))) {
			nextBreak();
			if (fSuccessor != BreakIterator.DONE && fContent.charAt(fNext) == IHtmlTagConstants.HTML_ENTITY_END) {
				nextBreak();
				if (isToken(fContent.substring(fPrevious, fNext), IHtmlTagConstants.HTML_ENTITY_CODES)) {
					skipTokens(fPrevious, IHtmlTagConstants.HTML_ENTITY_END);
					update= true;
				} else
					token= fContent.substring(fPrevious, fNext);
			} else
				token= fContent.substring(fPrevious, fNext);

			update= true;
		} else if (!isWhitespace(fPrevious, fNext) && isAlphaNumeric(fPrevious, fNext)) {

			if (isUrlToken(fPrevious))
				skipTokens(fPrevious, WHITE_SPACE_TOKEN);
			else if (isToken(IJavaDocTagConstants.JAVADOC_PARAM_TAGS))
				fLastToken= null;
			else if (isToken(IJavaDocTagConstants.JAVADOC_REFERENCE_TAGS)) {
				fLastToken= null;
				skipTokens(fPrevious, fDelimiter.charAt(0));
			} else if (fNext - fPrevious > 1 || isSingleLetter(fPrevious) && !fIsIgnoringSingleLetters)
				token= fContent.substring(fPrevious, fNext);

			update= true;
		}
	}

	if (update && fSentenceBreaks.size() > 0) {

		if (fPrevious >= nextSentence()) {

			while (fSentenceBreaks.size() > 0 && fPrevious >= nextSentence())
				fSentenceBreaks.removeFirst();

			fStartsSentence= (fLastToken == null) || (token != null);
		}
	}
	return token;
}
 
Example 17
Source File: WordRecognizerWrapper.java    From birt with Eclipse Public License 1.0 4 votes vote down vote up
public boolean hasWord( )
{
	return end != BreakIterator.DONE && end < text.length( );
}
 
Example 18
Source File: SimpleFilteredSentenceBreakIterator.java    From fitnotifications with Apache License 2.0 4 votes vote down vote up
/**
 * Is there an exception at this point?
 *
 * @param n
 * @return
 */
private final boolean breakExceptionAt(int n) {
    // Note: the C++ version of this function is SimpleFilteredSentenceBreakIterator::breakExceptionAt()

    int bestPosn = -1;
    int bestValue = -1;

    // loops while 'n' points to an exception
    text.setIndex(n);
    backwardsTrie.reset();
    int uch;

    // Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
    if ((uch = text.previousCodePoint()) == ' ') { // TODO: skip a class of chars here??
        // TODO only do this the 1st time?
    } else {
        uch = text.nextCodePoint();
    }

    BytesTrie.Result r = BytesTrie.Result.INTERMEDIATE_VALUE;

    while ((uch = text.previousCodePoint()) != UCharacterIterator.DONE && // more to consume backwards and..
            ((r = backwardsTrie.nextForCodePoint(uch)).hasNext())) {// more in the trie
        if (r.hasValue()) { // remember the best match so far
            bestPosn = text.getIndex();
            bestValue = backwardsTrie.getValue();
        }
    }

    if (r.matches()) { // exact match?
        bestValue = backwardsTrie.getValue();
        bestPosn = text.getIndex();
    }

    if (bestPosn >= 0) {
        if (bestValue == Builder.MATCH) { // exact match!
            return true; // Exception here.
        } else if (bestValue == Builder.PARTIAL && forwardsPartialTrie != null) {
            // make sure there's a forward trie
            // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
            // to see if it matches something going forward.
            forwardsPartialTrie.reset();

            BytesTrie.Result rfwd = BytesTrie.Result.INTERMEDIATE_VALUE;
            text.setIndex(bestPosn); // hope that's close ..
            while ((uch = text.nextCodePoint()) != BreakIterator.DONE
                    && ((rfwd = forwardsPartialTrie.nextForCodePoint(uch)).hasNext())) {
            }
            if (rfwd.matches()) {
                // Exception here
                return true;
            } // else fall through
        } // else fall through
    } // else fall through
    return false; // No exception here.
}
 
Example 19
Source File: SpellCheckIterator.java    From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 3 votes vote down vote up
public String next() {

		String token= nextToken();
		while (token == null && fSuccessor != BreakIterator.DONE)
			token= nextToken();

		fLastToken= token;

		return token;
	}