Java Code Examples for java.text.BreakIterator#setText()

The following examples show how to use java.text.BreakIterator#setText() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MyFormatter.java    From triplea with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Adds HTML line breaks and indentation to a string so it wraps for things like long tooltips.
 *
 * <pre>
 * string part 1
 *           string part 2
 *           ...
 *           string part X
 * </pre>
 */
public static String addHtmlBreaksAndIndents(
    final String target, final int firstLineMaxLength, final int maxLength) {
  final StringBuilder sb = new StringBuilder();
  final BreakIterator breakIterator = BreakIterator.getLineInstance();
  breakIterator.setText(target);
  int start = breakIterator.first();
  int end = breakIterator.next();
  int lineLength = 0;
  int currentMaxLength = firstLineMaxLength;
  while (end != BreakIterator.DONE) {
    final String word = target.substring(start, end);
    lineLength = lineLength + word.length();
    if (lineLength >= currentMaxLength) {
      sb.append("<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;");
      lineLength = word.length() + 5; // Add 5 for the indent
      currentMaxLength = maxLength;
    }
    sb.append(word);
    start = end;
    end = breakIterator.next();
  }
  return sb.toString();
}
 
Example 2
Source File: Chapter2.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 6 votes vote down vote up
private static void usingTheBreakIterator() {
    Locale currentLocale = new Locale("en", "US");
    BreakIterator wordIterator
            = BreakIterator.getWordInstance();
    String text = "Let's pause, and then reflect.";
    wordIterator.setText(text);
    int boundary = wordIterator.first();
    while (boundary != BreakIterator.DONE) {
        int begin = boundary;
        System.out.print(boundary + "-");
        boundary = wordIterator.next();
        int end = boundary;
        if(end == BreakIterator.DONE) break;
        System.out.println(boundary + " [" + 
                text.substring(begin, end) + "]");
    }
}
 
Example 3
Source File: BreakIteratorTest.java    From dragonwell8_jdk with GNU General Public License v2.0 6 votes vote down vote up
public void TestBug4153072() {
    BreakIterator iter = BreakIterator.getWordInstance();
    String str = "...Hello, World!...";
    int begin = 3;
    int end = str.length() - 3;
    boolean gotException = false;
    boolean dummy;

    iter.setText(new StringCharacterIterator(str, begin, end, begin));
    for (int index = -1; index < begin + 1; ++index) {
        try {
            dummy = iter.isBoundary(index);
            if (index < begin)
                errln("Didn't get exception with offset = " + index +
                                " and begin index = " + begin);
        }
        catch (IllegalArgumentException e) {
            if (index >= begin)
                errln("Got exception with offset = " + index +
                                " and begin index = " + begin);
        }
    }
}
 
Example 4
Source File: BreakIteratorTest.java    From openjdk-jdk8u with GNU General Public License v2.0 6 votes vote down vote up
public void TestBug4153072() {
    BreakIterator iter = BreakIterator.getWordInstance();
    String str = "...Hello, World!...";
    int begin = 3;
    int end = str.length() - 3;
    boolean gotException = false;
    boolean dummy;

    iter.setText(new StringCharacterIterator(str, begin, end, begin));
    for (int index = -1; index < begin + 1; ++index) {
        try {
            dummy = iter.isBoundary(index);
            if (index < begin)
                errln("Didn't get exception with offset = " + index +
                                " and begin index = " + begin);
        }
        catch (IllegalArgumentException e) {
            if (index >= begin)
                errln("Got exception with offset = " + index +
                                " and begin index = " + begin);
        }
    }
}
 
Example 5
Source File: CapitalizeWordsInSentence.java    From levelup-java-examples with Apache License 2.0 5 votes vote down vote up
private static int nextWordStartAfter(int pos, String text) {
    BreakIterator wb = BreakIterator.getWordInstance();
    wb.setText(text);
    int last = wb.following(pos);
    int current = wb.next();
    while (current != BreakIterator.DONE) {
        for (int p = last; p < current; p++) {
            if (Character.isLetter(text.codePointAt(p)))
                return last;
        }
        last = current;
        current = wb.next();
    }
    return BreakIterator.DONE;
}
 
Example 6
Source File: AccessibleHTML.java    From openjdk-8 with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Returns the Segment at <code>index</code> representing either
 * the paragraph or sentence as identified by <code>part</code>, or
 * null if a valid paragraph/sentence can't be found. The offset
 * will point to the start of the word/sentence in the array, and
 * the modelOffset will point to the location of the word/sentence
 * in the model.
 */
private IndexedSegment getSegmentAt(int part, int index)
    throws BadLocationException {

    IndexedSegment seg = getParagraphElementText(index);
    if (seg == null) {
        return null;
    }
    BreakIterator iterator;
    switch (part) {
    case AccessibleText.WORD:
        iterator = BreakIterator.getWordInstance(getLocale());
        break;
    case AccessibleText.SENTENCE:
        iterator = BreakIterator.getSentenceInstance(getLocale());
        break;
    default:
        return null;
    }
    seg.first();
    iterator.setText(seg);
    int end = iterator.following(index - seg.modelOffset + seg.offset);
    if (end == BreakIterator.DONE) {
        return null;
    }
    if (end > seg.offset + seg.count) {
        return null;
    }
    int begin = iterator.previous();
    if (begin == BreakIterator.DONE ||
        begin >= seg.offset + seg.count) {
        return null;
    }
    seg.modelOffset = seg.modelOffset + begin - seg.offset;
    seg.offset = begin;
    seg.count = end - begin;
    return seg;
}
 
Example 7
Source File: AccessibleHTML.java    From jdk8u-dev-jdk with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Returns the Segment at <code>index</code> representing either
 * the paragraph or sentence as identified by <code>part</code>, or
 * null if a valid paragraph/sentence can't be found. The offset
 * will point to the start of the word/sentence in the array, and
 * the modelOffset will point to the location of the word/sentence
 * in the model.
 */
private IndexedSegment getSegmentAt(int part, int index)
    throws BadLocationException {

    IndexedSegment seg = getParagraphElementText(index);
    if (seg == null) {
        return null;
    }
    BreakIterator iterator;
    switch (part) {
    case AccessibleText.WORD:
        iterator = BreakIterator.getWordInstance(getLocale());
        break;
    case AccessibleText.SENTENCE:
        iterator = BreakIterator.getSentenceInstance(getLocale());
        break;
    default:
        return null;
    }
    seg.first();
    iterator.setText(seg);
    int end = iterator.following(index - seg.modelOffset + seg.offset);
    if (end == BreakIterator.DONE) {
        return null;
    }
    if (end > seg.offset + seg.count) {
        return null;
    }
    int begin = iterator.previous();
    if (begin == BreakIterator.DONE ||
        begin >= seg.offset + seg.count) {
        return null;
    }
    seg.modelOffset = seg.modelOffset + begin - seg.offset;
    seg.offset = begin;
    seg.count = end - begin;
    return seg;
}
 
Example 8
Source File: CaretSelectionBindImpl.java    From RichTextFX with BSD 2-Clause "Simplified" License 5 votes vote down vote up
@Override
public void updateEndByBreaksForward(int numOfBreaks, BreakIterator breakIterator) {
    if (getAreaLength() == 0) {
        return;
    }

    breakIterator.setText(getArea().getText());
    int position = calculatePositionViaBreakingForwards(numOfBreaks, breakIterator, getStartPosition());
    updateEndTo(position);
}
 
Example 9
Source File: ConditionalSpecialCasing.java    From jdk8u-jdk with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Implements the "Final_Cased" condition
 *
 * Specification: Within the closest word boundaries containing C, there is a cased
 * letter before C, and there is no cased letter after C.
 *
 * Regular Expression:
 *   Before C: [{cased==true}][{wordBoundary!=true}]*
 *   After C: !([{wordBoundary!=true}]*[{cased}])
 */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;

    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
            i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);
        if (isCased(ch)) {

            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index));
                    (i < len) && !wordBoundary.isBoundary(i);
                    i += Character.charCount(ch)) {

                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }

            return true;
        }
    }

    return false;
}
 
Example 10
Source File: Bug4912404.java    From dragonwell8_jdk with GNU General Public License v2.0 5 votes vote down vote up
public static void main(String[] args) {
    BreakIterator b = BreakIterator.getWordInstance();
    b.setText("abc");
    if (b.equals(null)) {
        throw new RuntimeException("BreakIterator.equals(null) should return false.");
    }
}
 
Example 11
Source File: AccessibleHTML.java    From dragonwell8_jdk with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Returns the Segment at <code>index</code> representing either
 * the paragraph or sentence as identified by <code>part</code>, or
 * null if a valid paragraph/sentence can't be found. The offset
 * will point to the start of the word/sentence in the array, and
 * the modelOffset will point to the location of the word/sentence
 * in the model.
 */
private IndexedSegment getSegmentAt(int part, int index)
    throws BadLocationException {

    IndexedSegment seg = getParagraphElementText(index);
    if (seg == null) {
        return null;
    }
    BreakIterator iterator;
    switch (part) {
    case AccessibleText.WORD:
        iterator = BreakIterator.getWordInstance(getLocale());
        break;
    case AccessibleText.SENTENCE:
        iterator = BreakIterator.getSentenceInstance(getLocale());
        break;
    default:
        return null;
    }
    seg.first();
    iterator.setText(seg);
    int end = iterator.following(index - seg.modelOffset + seg.offset);
    if (end == BreakIterator.DONE) {
        return null;
    }
    if (end > seg.offset + seg.count) {
        return null;
    }
    int begin = iterator.previous();
    if (begin == BreakIterator.DONE ||
        begin >= seg.offset + seg.count) {
        return null;
    }
    seg.modelOffset = seg.modelOffset + begin - seg.offset;
    seg.offset = begin;
    seg.count = end - begin;
    return seg;
}
 
Example 12
Source File: AbstractWordAwareDoubleClickStrategy.java    From xtext-eclipse with Eclipse Public License 2.0 5 votes vote down vote up
@Override
protected IRegion findWord(IDocument document, int offset) {
	try {
		IRegion line = document.getLineInformationOfOffset(offset);

		if (offset == line.getOffset() + line.getLength())
			return null;

		BreakIterator breakIter = createBreakIterator();
		CharacterIterator characterIterator = new DocumentCharacterIterator(document);
		breakIter.setText(characterIterator);
		int start = breakIter.preceding(offset);
		if (start == BreakIterator.DONE)
			start = line.getOffset();

		int end = breakIter.following(offset);
		if (end == BreakIterator.DONE)
			end = line.getOffset() + line.getLength();

		if (breakIter.isBoundary(offset)) {
			if (end - offset > offset - start)
				start = offset;
			else
				end = offset;
		}

		if (end == start)
			return null;
		return new Region(start, end - start);
	} catch (BadLocationException e) {
		return null;
	}
}
 
Example 13
Source File: ConditionalSpecialCasing.java    From jdk-1.7-annotated with Apache License 2.0 5 votes vote down vote up
/**
 * Implements the "Final_Cased" condition
 *
 * Specification: Within the closest word boundaries containing C, there is a cased
 * letter before C, and there is no cased letter after C.
 *
 * Regular Expression:
 *   Before C: [{cased==true}][{wordBoundary!=true}]*
 *   After C: !([{wordBoundary!=true}]*[{cased}])
 */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;

    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
            i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);
        if (isCased(ch)) {

            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index));
                    (i < len) && !wordBoundary.isBoundary(i);
                    i += Character.charCount(ch)) {

                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }

            return true;
        }
    }

    return false;
}
 
Example 14
Source File: AccessibleHTML.java    From jdk1.8-source-analysis with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the Segment at <code>index</code> representing either
 * the paragraph or sentence as identified by <code>part</code>, or
 * null if a valid paragraph/sentence can't be found. The offset
 * will point to the start of the word/sentence in the array, and
 * the modelOffset will point to the location of the word/sentence
 * in the model.
 */
private IndexedSegment getSegmentAt(int part, int index)
    throws BadLocationException {

    IndexedSegment seg = getParagraphElementText(index);
    if (seg == null) {
        return null;
    }
    BreakIterator iterator;
    switch (part) {
    case AccessibleText.WORD:
        iterator = BreakIterator.getWordInstance(getLocale());
        break;
    case AccessibleText.SENTENCE:
        iterator = BreakIterator.getSentenceInstance(getLocale());
        break;
    default:
        return null;
    }
    seg.first();
    iterator.setText(seg);
    int end = iterator.following(index - seg.modelOffset + seg.offset);
    if (end == BreakIterator.DONE) {
        return null;
    }
    if (end > seg.offset + seg.count) {
        return null;
    }
    int begin = iterator.previous();
    if (begin == BreakIterator.DONE ||
        begin >= seg.offset + seg.count) {
        return null;
    }
    seg.modelOffset = seg.modelOffset + begin - seg.offset;
    seg.offset = begin;
    seg.count = end - begin;
    return seg;
}
 
Example 15
Source File: TestOpenNLPSentenceBreakIterator.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSliceEnd() throws Exception {
  NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
  BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
  bi.setText(getCharArrayIterator(SENTENCES[0] + PADDING, 0, SENTENCES[0].length()));

  test1Sentence(bi, SENTENCES[0]);
}
 
Example 16
Source File: LogWriterImpl.java    From gemfirexd-oss with Apache License 2.0 4 votes vote down vote up
static void formatText(PrintWriter writer, String target,
                         int initialLength) {
    BreakIterator boundary = BreakIterator.getLineInstance();
    boundary.setText(target);
    int start = boundary.first();
    int end = boundary.next();
    int lineLength = initialLength;

    while (end != BreakIterator.DONE) {
      // Look at the end and only accept whitespace breaks
      char endChar = target.charAt(end-1);
      while (!Character.isWhitespace(endChar)) {
        int lastEnd = end;
        end = boundary.next();
        if (end == BreakIterator.DONE) {
          // give up. We are at the end of the string
          end = lastEnd;
          break;
        }
        endChar = target.charAt(end-1);
      }
      int wordEnd = end;
      if (endChar == '\n') {
        // trim off the \n since println will do it for us
        wordEnd--;
        if (wordEnd > 0 && target.charAt(wordEnd-1) == '\r') {
          wordEnd--;
        }
      } else if (endChar == '\t') {
        // figure tabs use 8 characters
        lineLength += 7;
      }
    String word = target.substring(start, wordEnd);
    lineLength += word.length();
    writer.print(word);
    if (endChar == '\n' || endChar == '\r') {
      // force end of line
      writer.println();
      writer.print("  ");
      lineLength = 2;
    }
    start = end;
    end = boundary.next();
  }
  if (lineLength != 0) {
    writer.println();
  }
}
 
Example 17
Source File: WordCountService.java    From mojito with Apache License 2.0 4 votes vote down vote up
/**
 * Gets the number of words in the string assuming the string is in English.
 * 
 * This implementation doesn't know about placeholders. They are counted as
 * word. Later, we can do something more clever using an 
 * Okapi step later to exclude them.
 *
 * @param string
 * @return number of word
 */
public int getEnglishWordCount(String string) {

    int wordCount = 0;

    BreakIterator wordBreakIterator = BreakIterator.getWordInstance(Locale.ENGLISH);

    wordBreakIterator.setText(string);

    int start = wordBreakIterator.first();
    int end = wordBreakIterator.next();

    while (end != BreakIterator.DONE) {
        
        if (Character.isLetterOrDigit(string.charAt(start))) {
            wordCount += 1;
        }
        
        start = end;
        end = wordBreakIterator.next();
    }

    return wordCount;
}
 
Example 18
Source File: GlyphView.java    From dragonwell8_jdk with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Returns a location to break at in the passed in region, or
 * BreakIterator.DONE if there isn't a good location to break at
 * in the specified region.
 */
private int getBreakSpot(int p0, int p1) {
    if (breakSpots == null) {
        // Re-calculate breakpoints for the whole view
        int start = getStartOffset();
        int end = getEndOffset();
        int[] bs = new int[end + 1 - start];
        int ix = 0;

        // Breaker should work on the parent element because there may be
        // a valid breakpoint at the end edge of the view (space, etc.)
        Element parent = getElement().getParentElement();
        int pstart = (parent == null ? start : parent.getStartOffset());
        int pend = (parent == null ? end : parent.getEndOffset());

        Segment s = getText(pstart, pend);
        s.first();
        BreakIterator breaker = getBreaker();
        breaker.setText(s);

        // Backward search should start from end+1 unless there's NO end+1
        int startFrom = end + (pend > end ? 1 : 0);
        for (;;) {
            startFrom = breaker.preceding(s.offset + (startFrom - pstart))
                      + (pstart - s.offset);
            if (startFrom > start) {
                // The break spot is within the view
                bs[ix++] = startFrom;
            } else {
                break;
            }
        }

        SegmentCache.releaseSharedSegment(s);
        breakSpots = new int[ix];
        System.arraycopy(bs, 0, breakSpots, 0, ix);
    }

    int breakSpot = BreakIterator.DONE;
    for (int i = 0; i < breakSpots.length; i++) {
        int bsp = breakSpots[i];
        if (bsp <= p1) {
            if (bsp > p0) {
                breakSpot = bsp;
            }
            break;
        }
    }
    return breakSpot;
}
 
Example 19
Source File: GlyphView.java    From openjdk-jdk9 with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Returns a location to break at in the passed in region, or
 * BreakIterator.DONE if there isn't a good location to break at
 * in the specified region.
 */
private int getBreakSpot(int p0, int p1) {
    if (breakSpots == null) {
        // Re-calculate breakpoints for the whole view
        int start = getStartOffset();
        int end = getEndOffset();
        int[] bs = new int[end + 1 - start];
        int ix = 0;

        // Breaker should work on the parent element because there may be
        // a valid breakpoint at the end edge of the view (space, etc.)
        Element parent = getElement().getParentElement();
        int pstart = (parent == null ? start : parent.getStartOffset());
        int pend = (parent == null ? end : parent.getEndOffset());

        Segment s = getText(pstart, pend);
        s.first();
        BreakIterator breaker = getBreaker();
        breaker.setText(s);

        // Backward search should start from end+1 unless there's NO end+1
        int startFrom = end + (pend > end ? 1 : 0);
        for (;;) {
            startFrom = breaker.preceding(s.offset + (startFrom - pstart))
                      + (pstart - s.offset);
            if (startFrom > start) {
                // The break spot is within the view
                bs[ix++] = startFrom;
            } else {
                break;
            }
        }

        SegmentCache.releaseSharedSegment(s);
        breakSpots = new int[ix];
        System.arraycopy(bs, 0, breakSpots, 0, ix);
    }

    int breakSpot = BreakIterator.DONE;
    for (int i = 0; i < breakSpots.length; i++) {
        int bsp = breakSpots[i];
        if (bsp <= p1) {
            if (bsp > p0) {
                breakSpot = bsp;
            }
            break;
        }
    }
    return breakSpot;
}
 
Example 20
Source File: GlyphView.java    From jdk8u-dev-jdk with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Returns a location to break at in the passed in region, or
 * BreakIterator.DONE if there isn't a good location to break at
 * in the specified region.
 */
private int getBreakSpot(int p0, int p1) {
    if (breakSpots == null) {
        // Re-calculate breakpoints for the whole view
        int start = getStartOffset();
        int end = getEndOffset();
        int[] bs = new int[end + 1 - start];
        int ix = 0;

        // Breaker should work on the parent element because there may be
        // a valid breakpoint at the end edge of the view (space, etc.)
        Element parent = getElement().getParentElement();
        int pstart = (parent == null ? start : parent.getStartOffset());
        int pend = (parent == null ? end : parent.getEndOffset());

        Segment s = getText(pstart, pend);
        s.first();
        BreakIterator breaker = getBreaker();
        breaker.setText(s);

        // Backward search should start from end+1 unless there's NO end+1
        int startFrom = end + (pend > end ? 1 : 0);
        for (;;) {
            startFrom = breaker.preceding(s.offset + (startFrom - pstart))
                      + (pstart - s.offset);
            if (startFrom > start) {
                // The break spot is within the view
                bs[ix++] = startFrom;
            } else {
                break;
            }
        }

        SegmentCache.releaseSharedSegment(s);
        breakSpots = new int[ix];
        System.arraycopy(bs, 0, breakSpots, 0, ix);
    }

    int breakSpot = BreakIterator.DONE;
    for (int i = 0; i < breakSpots.length; i++) {
        int bsp = breakSpots[i];
        if (bsp <= p1) {
            if (bsp > p0) {
                breakSpot = bsp;
            }
            break;
        }
    }
    return breakSpot;
}