Java Code Examples for java.text.BreakIterator#getWordInstance()

The following examples show how to use java.text.BreakIterator#getWordInstance() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TextServiceImpl.java    From olat with Apache License 2.0 6 votes vote down vote up
private int countWords(String text, Locale locale) {
    int count = 0;
    BreakIterator wordIterator = BreakIterator.getWordInstance(locale);

    wordIterator.setText(text);
    int start = wordIterator.first();
    int end = wordIterator.next();
    while (end != BreakIterator.DONE) {
        char ch = text.charAt(start);
        if (Character.isLetterOrDigit(ch)) {
            count++;
        }
        start = end;
        end = wordIterator.next();
    }

    return count;
}
 
Example 2
Source File: Bug4533872.java    From openjdk-jdk8u with GNU General Public License v2.0 6 votes vote down vote up
void TestPrintAt_1() {
    iter = BreakIterator.getWordInstance(Locale.US);

    int[][] index = {
        {2, 8, 10, 15, 17},
        {1, 8, 10, 12, 15, 17, 20},
        {3, 8, 10, 13, 16, 18, 20},
        {4, 6,  9, 10, 16},
    };

    for (int i = 0; i < given.length; i++) {
        iter.setText(given[i]);
        for (int j = index[i].length-1; j >= 0; j--) {
            end = iter.following(index[i][j]);
            start = iter.previous();

            if (!expected[i][j].equals(given[i].substring(start, end))) {
                errln("Word break failure: printAt_1() expected:<" +
                      expected[i][j] + ">, got:<" +
                      given[i].substring(start, end) +
                      "> start=" + start + "  end=" + end);
            }
        }
    }
}
 
Example 3
Source File: StringUtils.java    From Spark with Apache License 2.0 5 votes vote down vote up
/**
    * Converts a line of text into an array of lower case words using a
    * {@link BreakIterator}.wordInstance().
    * <p/>
    * This method is under the Jive Open Source Software License
    * 
    * @author Mark Imbriaco.
    * 
    * @param text
    *            a String of text to convert into an array of words
    * @return text broken up into an array of words.
    */
   public static String[] toLowerCaseWordArray(String text) {
if (text == null || text.length() == 0) {
    return new String[0];
}

ArrayList<String> wordList = new ArrayList<>();
BreakIterator boundary = BreakIterator.getWordInstance();
boundary.setText(text);
int start = 0;

for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary
	.next()) {
    String tmp = text.substring(start, end).trim();
    // Remove characters that are not needed.
    tmp = replace(tmp, "+", "");
    tmp = replace(tmp, "/", "");
    tmp = replace(tmp, "\\", "");
    tmp = replace(tmp, "#", "");
    tmp = replace(tmp, "*", "");
    tmp = replace(tmp, ")", "");
    tmp = replace(tmp, "(", "");
    tmp = replace(tmp, "&", "");
    if (tmp.length() > 0) {
	wordList.add(tmp);
    }
}
return wordList.toArray(new String[wordList.size()]);
   }
 
Example 4
Source File: ConditionalSpecialCasing.java    From jdk8u60 with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Implements the "Final_Cased" condition
 *
 * Specification: Within the closest word boundaries containing C, there is a cased
 * letter before C, and there is no cased letter after C.
 *
 * Regular Expression:
 *   Before C: [{cased==true}][{wordBoundary!=true}]*
 *   After C: !([{wordBoundary!=true}]*[{cased}])
 */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;

    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
            i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);
        if (isCased(ch)) {

            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index));
                    (i < len) && !wordBoundary.isBoundary(i);
                    i += Character.charCount(ch)) {

                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }

            return true;
        }
    }

    return false;
}
 
Example 5
Source File: Bug4533872.java    From openjdk-jdk8u with GNU General Public License v2.0 5 votes vote down vote up
void TestPrintEachForward() {
    iter = BreakIterator.getWordInstance(Locale.US);

    for (int i = 0; i < given.length; i++) {
        iter.setText(given[i]);
        start = iter.first();

        // Check current()'s return value - should be same as first()'s.
        current = iter.current();
        if (start != current) {
            errln("Word break failure: printEachForward() Unexpected current value: current()=" +
                  current + ", expected(=first())=" + start);
        }

        int j = 0;
        for (end = iter.next();
             end != BreakIterator.DONE;
             start = end, end = iter.next(), j++) {

            // Check current()'s return value - should be same as next()'s.
            current = iter.current();
            if (end != current) {
                errln("Word break failure: printEachForward() Unexpected current value: current()=" +
                      current + ", expected(=next())=" + end);
            }

            if (!expected[i][j].equals(given[i].substring(start, end))) {
                errln("Word break failure: printEachForward() expected:<" +
                      expected[i][j] + ">, got:<" +
                      given[i].substring(start, end) +
                      "> start=" + start + "  end=" + end);
            }
        }
    }
}
 
Example 6
Source File: BreakIteratorTest.java    From j2objc with Apache License 2.0 5 votes vote down vote up
public void testStress() throws Exception {
    char[] cs = { 'a' };
    for (int i = 0; i < 4096; ++i) {
        BreakIterator it = BreakIterator.getWordInstance(Locale.US);
        it.setText(new String(cs));
    }
}
 
Example 7
Source File: Bug4912404.java    From dragonwell8_jdk with GNU General Public License v2.0 5 votes vote down vote up
public static void main(String[] args) {
    BreakIterator b = BreakIterator.getWordInstance();
    b.setText("abc");
    if (b.equals(null)) {
        throw new RuntimeException("BreakIterator.equals(null) should return false.");
    }
}
 
Example 8
Source File: BreakIteratorTest.java    From openjdk-jdk9 with GNU General Public License v2.0 5 votes vote down vote up
public BreakIteratorTest()
{
    characterBreak = BreakIterator.getCharacterInstance();
    wordBreak = BreakIterator.getWordInstance();
    lineBreak = BreakIterator.getLineInstance();
    sentenceBreak = BreakIterator.getSentenceInstance();
}
 
Example 9
Source File: ImagePanel.java    From ios-image-util with MIT License 5 votes vote down vote up
/**
 * Split text into display lines.
 *
 * @param text	text for split
 * @param fm	FontMetrics
 * @return	splitted text into display lines
 */
private ArrayList<String> splitText(String text, FontMetrics fm) {
	ArrayList<String> lines = new ArrayList<String>();
	StringBuilder line = new StringBuilder();
	Locale l = Locale.getDefault();
	BreakIterator boundary = BreakIterator.getWordInstance(l.equals(Locale.JAPAN) || l.equals(Locale.JAPANESE) ? l : Locale.US);
	boundary.setText(text);
	int startIndex = boundary.first();
	for (int endIndex = boundary.next(); endIndex != BreakIterator.DONE; startIndex = endIndex, endIndex = boundary.next()) {
		String word = text.substring(startIndex, endIndex);
		if (fm.stringWidth(line.toString()) + fm.stringWidth(word) > this.getWidth()) {
			// Very easy hyphenation. (just only one character)
			if (this.hyphenatorBoL != null && word.length() == 1 && this.hyphenatorBoL.indexOf(word.charAt(0)) >= 0) {
				line.append(word);
				word = new String();
			} else if (this.hyphenatorEoL != null && line.length() > 1 && this.hyphenatorEoL.indexOf(line.charAt(line.length() - 1)) >= 0) {
				word = line.substring(line.length() - 1).concat(word);
				line.setLength(line.length() - 1);
			}
			if (line.toString().replace('\u3000', ' ').trim().length() > 0) {
				lines.add(line.toString());
			}
			line.setLength(0);
		}
		line.append(word);
	}
	if (line.toString().replace('\u3000', ' ').trim().length() > 0) {
		lines.add(line.toString());
	}
	return lines;
}
 
Example 10
Source File: BreakIteratorTest.java    From jdk8u_jdk with GNU General Public License v2.0 5 votes vote down vote up
public void TestWordInvariants()
{
    if (Locale.getDefault().getLanguage().equals("th")) {
        logln("This test is skipped in th locale.");
        return;
    }

    BreakIterator e = BreakIterator.getWordInstance();
    doBreakInvariantTest(e, cannedTestChars + "\',.\u3041\u3042\u3043\u309b\u309c\u30a1\u30a2"
        + "\u30a3\u4e00\u4e01\u4e02");
    doOtherInvariantTest(e, cannedTestChars + "\',.\u3041\u3042\u3043\u309b\u309c\u30a1\u30a2"
        + "\u30a3\u4e00\u4e01\u4e02");
}
 
Example 11
Source File: ConditionalSpecialCasing.java    From openjdk-8 with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Implements the "Final_Cased" condition
 *
 * Specification: Within the closest word boundaries containing C, there is a cased
 * letter before C, and there is no cased letter after C.
 *
 * Regular Expression:
 *   Before C: [{cased==true}][{wordBoundary!=true}]*
 *   After C: !([{wordBoundary!=true}]*[{cased}])
 */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;

    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
            i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);
        if (isCased(ch)) {

            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index));
                    (i < len) && !wordBoundary.isBoundary(i);
                    i += Character.charCount(ch)) {

                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }

            return true;
        }
    }

    return false;
}
 
Example 12
Source File: ConditionalSpecialCasing.java    From openjdk-jdk9 with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Implements the "Final_Cased" condition
 *
 * Specification: Within the closest word boundaries containing C, there is a cased
 * letter before C, and there is no cased letter after C.
 *
 * Regular Expression:
 *   Before C: [{cased==true}][{wordBoundary!=true}]*
 *   After C: !([{wordBoundary!=true}]*[{cased}])
 */
private static boolean isFinalCased(String src, int index, Locale locale) {
    BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);
    wordBoundary.setText(src);
    int ch;

    // Look for a preceding 'cased' letter
    for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);
            i -= Character.charCount(ch)) {

        ch = src.codePointBefore(i);
        if (isCased(ch)) {

            int len = src.length();
            // Check that there is no 'cased' letter after the index
            for (i = index + Character.charCount(src.codePointAt(index));
                    (i < len) && !wordBoundary.isBoundary(i);
                    i += Character.charCount(ch)) {

                ch = src.codePointAt(i);
                if (isCased(ch)) {
                    return false;
                }
            }

            return true;
        }
    }

    return false;
}
 
Example 13
Source File: MainPanel.java    From java-swing-tips with MIT License 5 votes vote down vote up
public static int getWordStart(JTextComponent c, int offs) throws BadLocationException {
  Element line = Optional.ofNullable(Utilities.getParagraphElement(c, offs))
      .orElseThrow(() -> new BadLocationException("No word at " + offs, offs));
  Document doc = c.getDocument();
  int lineStart = line.getStartOffset();
  int lineEnd = Math.min(line.getEndOffset(), doc.getLength());
  int offs2 = offs;
  Segment seg = SegmentCache.getSharedSegment();
  doc.getText(lineStart, lineEnd - lineStart, seg);
  if (seg.count > 0) {
    BreakIterator words = BreakIterator.getWordInstance(c.getLocale());
    words.setText(seg);
    int wordPosition = seg.offset + offs - lineStart;
    if (wordPosition >= words.last()) {
      wordPosition = words.last() - 1;
      words.following(wordPosition);
      offs2 = lineStart + words.previous() - seg.offset;
    } else {
      words.following(wordPosition);
      offs2 = lineStart + words.previous() - seg.offset;
      for (int i = offs; i > offs2; i--) {
        char ch = seg.charAt(i - seg.offset);
        if (ch == '_' || ch == '-') {
          offs2 = i + 1;
          break;
        }
      }
    }
  }
  SegmentCache.releaseSharedSegment(seg);
  return offs2;
}
 
Example 14
Source File: NavigationActions.java    From RichTextFX with BSD 2-Clause "Simplified" License 5 votes vote down vote up
/**
 * Skips n number of word boundaries forward.
 */
default void wordBreaksForwards(int n, SelectionPolicy selectionPolicy) {
    if(getLength() == 0) {
        return;
    }

    BreakIterator wordBreakIterator = BreakIterator.getWordInstance();
    wordBreakIterator.setText(getText());
    wordBreakIterator.following(getCaretPosition());
    for (int i = 1; i < n; i++) {
        wordBreakIterator.next();
    }

    moveTo(wordBreakIterator.current(), selectionPolicy);
}
 
Example 15
Source File: RenderableComplexText.java    From pentaho-reporting with GNU Lesser General Public License v2.1 5 votes vote down vote up
public void computeMinimumChunkWidth( final OutputProcessorMetaData data, final ResourceManager resourceManager ) {
  if ( getMinimumChunkWidth() != 0 ) {
    return;
  }

  if ( data.isFeatureSupported( OutputProcessorFeature.STRICT_COMPATIBILITY ) == false
      && getStyleSheet().getBooleanStyleProperty( TextStyleKeys.WORDBREAK ) == false ) {
    return;
  }

  long minimumChunkWidth = 0;
  BreakIterator wordInstance = BreakIterator.getWordInstance();
  wordInstance.setText( text );

  final boolean antiAliasing = RenderUtility.isFontSmooth( getStyleSheet(), data );
  final FontRenderContext fontRenderContext = new FontRenderContext( null, antiAliasing, true );

  int start = wordInstance.first();
  for ( int end = wordInstance.next(); end != BreakIterator.DONE; start = end, end = wordInstance.next() ) {
    String word = text.substring( start, end );
    AttributedCharacterIterator attributedCharacterIterator =
        new RichTextSpecProducer( data, resourceManager ).computeText( this, word )
            .createAttributedCharacterIterator();
    TextLayout t = new TextLayout( attributedCharacterIterator, fontRenderContext );
    double width = t.getVisibleAdvance();
    final long wordMinChunkWidth = StrictGeomUtility.toInternalValue( width );
    minimumChunkWidth = Math.max( minimumChunkWidth, wordMinChunkWidth );
  }

  setMinimumChunkWidth( minimumChunkWidth );
}
 
Example 16
Source File: JavaBreakIterator.java    From eclipse.jdt.ls with Eclipse Public License 2.0 4 votes vote down vote up
/**
 * Creates a new break iterator.
 */
public JavaBreakIterator() {
	fIterator = BreakIterator.getWordInstance();
	fIndex = fIterator.current();
}
 
Example 17
Source File: PerfReporter.java    From gemfirexd-oss with Apache License 2.0 4 votes vote down vote up
/**
 * Formats a long string into a 72-column, indented paragraph
 *
 * @param text
 *        The text to be filled
 * @param indent
 *        The number of spaces to indent
 *
 * author David Whitlock
 */
static String fillParagraph(String text, int indent) {
  StringWriter sw = new StringWriter();
  PrintWriter pw = new PrintWriter(sw, true);

  String indentString = "";
  for (int i = 0; i < indent; i++) {
    indentString += " ";
  }
  pw.print(indentString);

  int printed = indentString.length();
  boolean firstWord = true;

  BreakIterator boundary = BreakIterator.getWordInstance();
  boundary.setText(text);
  int start = boundary.first();
  for (int end = boundary.next(); end != BreakIterator.DONE; 
       start = end, end = boundary.next()) {

    String word = text.substring(start, end);

    if (printed + word.length() > 72) {
      pw.println("");
      pw.print(indentString);
      printed = indentString.length();
      firstWord = true;
    }

    if (word.charAt(word.length() - 1) == '\n') {
      pw.write(word, 0, word.length() - 1);

    } else if (firstWord &&
               Character.isWhitespace(word.charAt(0))) {
      pw.write(word, 1, word.length() - 1);

    } else {
      pw.print(word);
    }
    printed += (end - start);
    firstWord = false;
  }

  return sw.toString();
}
 
Example 18
Source File: Utils.java    From samoa with Apache License 2.0 4 votes vote down vote up
/**
  * Breaks up the string, if wider than "columns" characters.
  *
  * @param s		the string to process
  * @param columns	the width in columns
  * @return		the processed string
  */
 public static String[] breakUp(String s, int columns) {
   Vector<String>	result;
   String		line;
   BreakIterator	boundary;
   int			boundaryStart;
   int			boundaryEnd;
   String		word;
   String		punctuation;
   int			i;
   String[]		lines;

   result      = new Vector<String>();
   punctuation = " .,;:!?'\"";
   lines       = s.split("\n");

   for (i = 0; i < lines.length; i++) {
     boundary      = BreakIterator.getWordInstance();
     boundary.setText(lines[i]);
     boundaryStart = boundary.first();
     boundaryEnd   = boundary.next();
     line          = "";

     while (boundaryEnd != BreakIterator.DONE) {
word = lines[i].substring(boundaryStart, boundaryEnd);
if (line.length() >= columns) {
  if (word.length() == 1) {
    if (punctuation.indexOf(word.charAt(0)) > -1) {
      line += word;
      word = "";
    }
  }
  result.add(line);
  line = "";
}
line          += word;
boundaryStart  = boundaryEnd;
boundaryEnd    = boundary.next();
     }
     if (line.length() > 0)
result.add(line);
   }

   return result.toArray(new String[result.size()]);
 }
 
Example 19
Source File: AccessibilityIterators.java    From android_9.0.0_r45 with Apache License 2.0 4 votes vote down vote up
@Override
protected void onLocaleChanged(Locale locale) {
    mImpl = BreakIterator.getWordInstance(locale);
}
 
Example 20
Source File: InfoWindow.java    From jdk8u-dev-jdk with GNU General Public License v2.0 4 votes vote down vote up
private void _display(String caption, String text, String messageType) {
    captionLabel.setText(caption);

    BreakIterator iter = BreakIterator.getWordInstance();
    if (text != null) {
        iter.setText(text);
        int start = iter.first(), end;
        int nLines = 0;

        do {
            end = iter.next();

            if (end == BreakIterator.DONE ||
                text.substring(start, end).length() >= 50)
            {
                lineLabels[nLines].setText(text.substring(start, end == BreakIterator.DONE ?
                                                          iter.last() : end));
                textPanel.add(lineLabels[nLines++]);
                start = end;
            }
            if (nLines == BALLOON_WORD_LINE_MAX_COUNT) {
                if (end != BreakIterator.DONE) {
                    lineLabels[nLines - 1].setText(
                        new String(lineLabels[nLines - 1].getText() + " ..."));
                }
                break;
            }
        } while (end != BreakIterator.DONE);


        textPanel.setLayout(new GridLayout(nLines, 1));
    }

    if ("ERROR".equals(messageType)) {
        iconImage = errorImage;
    } else if ("WARNING".equals(messageType)) {
        iconImage = warnImage;
    } else if ("INFO".equals(messageType)) {
        iconImage = infoImage;
    } else {
        iconImage = null;
    }

    if (iconImage != null) {
        Dimension tpSize = textPanel.getSize();
        iconCanvas.setSize(BALLOON_ICON_WIDTH, (BALLOON_ICON_HEIGHT > tpSize.height ?
                                                BALLOON_ICON_HEIGHT : tpSize.height));
        iconCanvas.validate();
    }

    SunToolkit.executeOnEventHandlerThread(target, new Runnable() {
            public void run() {
                if (liveArguments.isDisposed()) {
                    return;
                }
                Point parLoc = getParent().getLocationOnScreen();
                Dimension parSize = getParent().getSize();
                show(new Point(parLoc.x + parSize.width/2, parLoc.y + parSize.height/2),
                     BALLOON_TRAY_ICON_INDENT);
                if (iconImage != null) {
                    iconCanvas.updateImage(iconImage); // call it after the show(..) above
                }
            }
        });
}