Java Code Examples for java.text.BreakIterator#next()

The following examples show how to use java.text.BreakIterator#next() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TextComponent.java    From openjdk-jdk8u with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Needed to unify forward and backward searching.
 * The method assumes that s is the text assigned to words.
 */
private int findWordLimit(int index, BreakIterator words, boolean direction,
                                 String s) {
    // Fix for 4256660 and 4256661.
    // Words iterator is different from character and sentence iterators
    // in that end of one word is not necessarily start of another word.
    // Please see java.text.BreakIterator JavaDoc. The code below is
    // based on nextWordStartAfter example from BreakIterator.java.
    int last = (direction == NEXT) ? words.following(index)
                                   : words.preceding(index);
    int current = (direction == NEXT) ? words.next()
                                      : words.previous();
    while (current != BreakIterator.DONE) {
        for (int p = Math.min(last, current); p < Math.max(last, current); p++) {
            if (Character.isLetter(s.charAt(p))) {
                return last;
            }
        }
        last = current;
        current = (direction == NEXT) ? words.next()
                                      : words.previous();
    }
    return BreakIterator.DONE;
}
 
Example 2
Source File: TextComponent.java    From JDKSourceCode1.8 with MIT License 6 votes vote down vote up
/**
 * Needed to unify forward and backward searching.
 * The method assumes that s is the text assigned to words.
 */
private int findWordLimit(int index, BreakIterator words, boolean direction,
                                 String s) {
    // Fix for 4256660 and 4256661.
    // Words iterator is different from character and sentence iterators
    // in that end of one word is not necessarily start of another word.
    // Please see java.text.BreakIterator JavaDoc. The code below is
    // based on nextWordStartAfter example from BreakIterator.java.
    int last = (direction == NEXT) ? words.following(index)
                                   : words.preceding(index);
    int current = (direction == NEXT) ? words.next()
                                      : words.previous();
    while (current != BreakIterator.DONE) {
        for (int p = Math.min(last, current); p < Math.max(last, current); p++) {
            if (Character.isLetter(s.charAt(p))) {
                return last;
            }
        }
        last = current;
        current = (direction == NEXT) ? words.next()
                                      : words.previous();
    }
    return BreakIterator.DONE;
}
 
Example 3
Source File: TextComponent.java    From openjdk-jdk9 with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Needed to unify forward and backward searching.
 * The method assumes that s is the text assigned to words.
 */
private int findWordLimit(int index, BreakIterator words, boolean direction,
                                 String s) {
    // Fix for 4256660 and 4256661.
    // Words iterator is different from character and sentence iterators
    // in that end of one word is not necessarily start of another word.
    // Please see java.text.BreakIterator JavaDoc. The code below is
    // based on nextWordStartAfter example from BreakIterator.java.
    int last = (direction == NEXT) ? words.following(index)
                                   : words.preceding(index);
    int current = (direction == NEXT) ? words.next()
                                      : words.previous();
    while (current != BreakIterator.DONE) {
        for (int p = Math.min(last, current); p < Math.max(last, current); p++) {
            if (Character.isLetter(s.charAt(p))) {
                return last;
            }
        }
        last = current;
        current = (direction == NEXT) ? words.next()
                                      : words.previous();
    }
    return BreakIterator.DONE;
}
 
Example 4
Source File: SimpleTokenAndSentenceAnnotator.java    From uima-uimaj with Apache License 2.0 6 votes vote down vote up
void makeAnnotations(Maker m, BreakIterator b) {
  b.setText(input);
  for (int end = b.next(), start = b.first(); end != BreakIterator.DONE; start = end, end = b
          .next()) {
    // eliminate all-whitespace tokens
    boolean isWhitespace = true;
    for (int i = start; i < end; i++) {
      if (!Character.isWhitespace(input.charAt(i))) {
        isWhitespace = false;
        break;
      }
    }
    if (!isWhitespace) {
      m.newAnnotation(jcas, start, end).addToIndexes();
    }
  }
}
 
Example 5
Source File: BreakIteratorTest.java    From TencentKona-8 with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Bug 4068137
 */
public void TestEndBehavior()
{
    String testString = "boo.";
    BreakIterator wb = BreakIterator.getWordInstance();
    wb.setText(testString);

    if (wb.first() != 0)
        errln("Didn't get break at beginning of string.");
    if (wb.next() != 3)
        errln("Didn't get break before period in \"boo.\"");
    if (wb.current() != 4 && wb.next() != 4)
        errln("Didn't get break at end of string.");
}
 
Example 6
Source File: TestSplittingBreakIterator.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a string comprised of spaces and '^' only at the boundaries.
 */
private String readBoundariesToString(BreakIterator bi, String text) {
  // init markers to spaces
  StringBuilder markers = new StringBuilder();
  markers.setLength(text.length() + 1);
  for (int k = 0; k < markers.length(); k++) {
    markers.setCharAt(k, ' ');
  }

  bi.setText(text);
  for (int boundary = bi.current(); boundary != BreakIterator.DONE; boundary = bi.next()) {
    markers.setCharAt(boundary, '^');
  }
  return markers.toString();
}
 
Example 7
Source File: SexpBaseForwardHandler.java    From e4macs with Eclipse Public License 1.0 5 votes vote down vote up
/**
 * @see com.mulgasoft.emacsplus.commands.SexpHandler#getNextPosition(org.eclipse.jface.text.IDocument, java.text.BreakIterator)
 */
@Override
protected int getNextPosition(IDocument document, BreakIterator iter) {
	int pos = iter.current();
	int result = iter.next();
	if (result != BreakIterator.DONE) {
		result = checkDot(document,pos,result);
		result = checkUnder(document,result);			
	}
	return result;
}
 
Example 8
Source File: DictionaryResource.java    From newsleak with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Checks if a String is a multi word unit.
 *
 * @param t
 *            the t
 * @return true, if is multi word
 */
private boolean isMultiWord(String t) {
	BreakIterator tokenBreaker = BreakIterator.getWordInstance(locale);
	tokenBreaker.setText(t);

	// count tokens
	int pos = tokenBreaker.first();
	int nTokens = 0;
	while (pos != BreakIterator.DONE) {
		nTokens++;
		pos = tokenBreaker.next();
	}
	nTokens = nTokens / 2;
	return nTokens > 1;
}
 
Example 9
Source File: MirroredBreakIterator.java    From jdk8u-jdk with GNU General Public License v2.0 5 votes vote down vote up
MirroredBreakIterator(BreakIterator bi) {
    List<Integer> b = new ArrayList<Integer>();
    int i = bi.first();
    charIndex = i;
    for (; i != DONE; i = bi.next()) {
        b.add(i);
    }
    boundaries = Collections.unmodifiableList(b);
}
 
Example 10
Source File: MirroredBreakIterator.java    From jdk8u-jdk with GNU General Public License v2.0 5 votes vote down vote up
MirroredBreakIterator(BreakIterator bi) {
    List<Integer> b = new ArrayList<Integer>();
    int i = bi.first();
    charIndex = i;
    for (; i != DONE; i = bi.next()) {
        b.add(i);
    }
    boundaries = Collections.unmodifiableList(b);
}
 
Example 11
Source File: NavigationActions.java    From RichTextFX with BSD 2-Clause "Simplified" License 5 votes vote down vote up
/**
 * Skips n number of word boundaries forward.
 */
default void wordBreaksForwards(int n, SelectionPolicy selectionPolicy) {
    if(getLength() == 0) {
        return;
    }

    BreakIterator wordBreakIterator = BreakIterator.getWordInstance();
    wordBreakIterator.setText(getText());
    wordBreakIterator.following(getCaretPosition());
    for (int i = 1; i < n; i++) {
        wordBreakIterator.next();
    }

    moveTo(wordBreakIterator.current(), selectionPolicy);
}
 
Example 12
Source File: MtasDocumentIndex.java    From inception with Apache License 2.0 5 votes vote down vote up
private String preprocessQuery(String aQuery)
{
    String result;

    if (!(aQuery.contains("\"") || aQuery.contains("[") || aQuery.contains("]")
            || aQuery.contains("{") || aQuery.contains("}") || aQuery.contains("<")
            || aQuery.contains(">"))) {
        // Convert raw words query to a Mtas CQP query

        result = "";
        BreakIterator words = BreakIterator.getWordInstance();
        words.setText(aQuery);

        int start = words.first();
        int end = words.next();
        while (end != BreakIterator.DONE) {
            String word = aQuery.substring(start, end);
            if (!word.trim().isEmpty()) {
                // Add the word to the query
                result += "\"" + word + "\"";
            }
            start = end;
            end = words.next();
            if (end != BreakIterator.DONE) {
                result += " ";
            }
        }
    }
    else {
        result = aQuery;
    }

    return result;
}
 
Example 13
Source File: DatePicker.java    From nebula with Eclipse Public License 2.0 5 votes vote down vote up
/**
 * set / update the text of the displayLabels. these are the Week column
 * headers above the days on the Calendar part of the <code>CDateTime</code>
 * .
 */
private void updateDaysOfWeek() {
    if (dayPanel != null) {
        Calendar tmpcal = cdt.getCalendarInstance();
        tmpcal.set(Calendar.DAY_OF_WEEK, tmpcal.getFirstDayOfWeek());
        Locale locale = cdt.getLocale();
        boolean ltr = ComponentOrientation.getOrientation(locale)
                .isLeftToRight() && !locale.getLanguage().equals("zh"); //$NON-NLS-1$
        BreakIterator iterator = BreakIterator.getCharacterInstance(locale);
        for (VLabel dayLabel : dayLabels) {
            String str = getFormattedDate("E", tmpcal.getTime()); //$NON-NLS-1$
            if (dayLabel.getData(CDT.Key.Compact, Boolean.class)) {
                iterator.setText(str);
                int start, end;
                if (ltr) {
                    start = iterator.first();
                    end = iterator.next();
                } else {
                    end = iterator.last();
                    start = iterator.previous();
                }
                dayLabel.setText(str.substring(start, end));
            } else {
                dayLabel.setText(str);
            }
            tmpcal.add(Calendar.DAY_OF_WEEK, 1);
        }
    }
}
 
Example 14
Source File: BaseUtilities.java    From netbeans with Apache License 2.0 4 votes vote down vote up
/** Wrap multi-line strings (and get the individual lines).
    * @param original  the original string to wrap
    * @param width     the maximum width of lines
    * @param breakIterator breaks original to chars, words, sentences, depending on what instance you provide.
    * @param removeNewLines if <code>true</code>, any newlines in the original string are ignored
    * @return the lines after wrapping
    */
    public static String[] wrapStringToArray(
        String original, int width, BreakIterator breakIterator, boolean removeNewLines
    ) {
        if (original.length() == 0) {
            return new String[] { original };
        }

        String[] workingSet;

        // substitute original newlines with spaces,
        // remove newlines from head and tail
        if (removeNewLines) {
            original = trimString(original);
            original = original.replace('\n', ' ');
            workingSet = new String[] { original };
        } else {
            StringTokenizer tokens = new StringTokenizer(original, "\n"); // NOI18N
            int len = tokens.countTokens();
            workingSet = new String[len];

            for (int i = 0; i < len; i++) {
                workingSet[i] = tokens.nextToken();
            }
        }

        if (width < 1) {
            width = 1;
        }

        if (original.length() <= width) {
            return workingSet;
        }

widthcheck:  {
            boolean ok = true;

            for (int i = 0; i < workingSet.length; i++) {
                ok = ok && (workingSet[i].length() < width);

                if (!ok) {
                    break widthcheck;
                }
            }

            return workingSet;
        }

        java.util.ArrayList<String> lines = new java.util.ArrayList<String>();

        int lineStart = 0; // the position of start of currently processed line in the original string

        for (int i = 0; i < workingSet.length; i++) {
            if (workingSet[i].length() < width) {
                lines.add(workingSet[i]);
            } else {
                breakIterator.setText(workingSet[i]);

                int nextStart = breakIterator.next();
                int prevStart = 0;

                do {
                    while (((nextStart - lineStart) < width) && (nextStart != BreakIterator.DONE)) {
                        prevStart = nextStart;
                        nextStart = breakIterator.next();
                    }

                    if (nextStart == BreakIterator.DONE) {
                        nextStart = prevStart = workingSet[i].length();
                    }

                    if (prevStart == 0) {
                        prevStart = nextStart;
                    }

                    lines.add(workingSet[i].substring(lineStart, prevStart));

                    lineStart = prevStart;
                    prevStart = 0;
                } while (lineStart < workingSet[i].length());

                lineStart = 0;
            }
        }

        String[] s = new String[lines.size()];

        return lines.toArray(s);
    }
 
Example 15
Source File: PlainText.java    From gcs with Mozilla Public License 2.0 4 votes vote down vote up
/**
 * Break the paragraph into individual lines.
 * 
 * @param font the font used for rendering the text.
 * @param fontSize the fontSize used for rendering the text.
 * @param width the width of the box holding the content.
 * @return the individual lines.
 * @throws IOException
 */
List<Line> getLines(PDFont font, float fontSize, float width) throws IOException
{
    BreakIterator iterator = BreakIterator.getLineInstance();
    iterator.setText(textContent);
    
    final float scale = fontSize/FONTSCALE;
    
    int start = iterator.first();
    int end = iterator.next();
    float lineWidth = 0;
    
    List<Line> textLines = new ArrayList<Line>();
    Line textLine = new Line();

    while (end != BreakIterator.DONE)
    {
        String word = textContent.substring(start,end);
        float wordWidth = font.getStringWidth(word) * scale;
        
        lineWidth = lineWidth + wordWidth;

        // check if the last word would fit without the whitespace ending it
        if (lineWidth >= width && Character.isWhitespace(word.charAt(word.length()-1)))
        {
            float whitespaceWidth = font.getStringWidth(word.substring(word.length()-1)) * scale;
            lineWidth = lineWidth - whitespaceWidth;
        }
        
        if (lineWidth >= width)
        {
            textLine.setWidth(textLine.calculateWidth(font, fontSize));
            textLines.add(textLine);
            textLine = new Line();
            lineWidth = font.getStringWidth(word) * scale;
        }
        
        AttributedString as = new AttributedString(word);
        as.addAttribute(TextAttribute.WIDTH, wordWidth);
        Word wordInstance = new Word(word);
        wordInstance.setAttributes(as);
        textLine.addWord(wordInstance);
        start = end;
        end = iterator.next();
    }
    textLine.setWidth(textLine.calculateWidth(font, fontSize));
    textLines.add(textLine);
    return textLines;
}
 
Example 16
Source File: BreakIteratorTest.java    From TencentKona-8 with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Bug 4638433
 */
public void TestLineBreakBasedOnUnicode3_0_0()
{
    BreakIterator iter;
    int i;

    /* Latin Extend-B characters
     * 0x0218-0x0233 which have been added since Unicode 3.0.0.
     */
    iter = BreakIterator.getWordInstance(Locale.US);
    iter.setText("\u0216\u0217\u0218\u0219\u021A");
    i = iter.first();
    i = iter.next();
    if (i != 5) {
        errln("Word break failure: failed to stop at 5 and bounded at " + i);
    }


    iter = BreakIterator.getLineInstance(Locale.US);

    /* <Three(Nd)><Two(Nd)><Low Double Prime Quotation Mark(Pe)><One(Nd)>
     * \u301f has changed its category from Ps to Pe since Unicode 2.1.
     */
    iter.setText("32\u301f1");
    i = iter.first();
    i = iter.next();
    if (i != 3) {
        errln("Line break failure: failed to skip before \\u301F(Pe) at 3 and bounded at " + i);
    }

    /* Mongolian <Letter A(Lo)><Todo Soft Hyphen(Pd)><Letter E(Lo)>
     * which have been added since Unicode 3.0.0.
     */
    iter.setText("\u1820\u1806\u1821");
    i = iter.first();
    i = iter.next();
    if (i != 2) {
        errln("Mongolian line break failure: failed to skip position before \\u1806(Pd) at 2 and bounded at " + i);
    }

    /* Khmer <ZERO(Nd)><Currency Symbol(Sc)><ONE(Nd)> which have
     * been added since Unicode 3.0.0.
     */
    iter.setText("\u17E0\u17DB\u17E1");
    i = iter.first();
    i = iter.next();
    if (i != 1) {
        errln("Khmer line break failure: failed to stop before \\u17DB(Sc) at 1 and bounded at " + i);
    }
    i = iter.next();
    if (i != 3) {
        errln("Khmer line break failure: failed to skip position after \\u17DB(Sc) at 3 and bounded at " + i);
    }

    /* Ogham <Letter UR(Lo)><Space Mark(Zs)><Letter OR(Lo)> which have
     * been added since Unicode 3.0.0.
     */
    iter.setText("\u1692\u1680\u1696");
    i = iter.first();
    i = iter.next();
    if (i != 2) {
        errln("Ogham line break failure: failed to skip postion before \\u1680(Zs) at 2 and bounded at " + i);
    }


    // Confirm changes in BreakIteratorRules_th.java have been reflected.
    iter = BreakIterator.getLineInstance(new Locale("th", ""));

    /* Thai <Seven(Nd)>
     *      <Left Double Quotation Mark(Pi)>
     *      <Five(Nd)>
     *      <Right Double Quotation Mark(Pf)>
     *      <Three(Nd)>
     */
    iter.setText("\u0E57\u201C\u0E55\u201D\u0E53");
    i = iter.first();
    i = iter.next();
    if (i != 1) {
        errln("Thai line break failure: failed to stop before \\u201C(Pi) at 1 and bounded at " + i);
    }
    i = iter.next();
    if (i != 4) {
        errln("Thai line break failure: failed to stop after \\u201D(Pf) at 4 and bounded at " + i);
    }
}
 
Example 17
Source File: Utils.java    From moa with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Breaks up the string, if wider than "columns" characters.
  *
  * @param s		the string to process
  * @param columns	the width in columns
  * @return		the processed string
  */
 public static String[] breakUp(String s, int columns) {
   Vector<String>	result;
   String		line;
   BreakIterator	boundary;
   int			boundaryStart;
   int			boundaryEnd;
   String		word;
   String		punctuation;
   int			i;
   String[]		lines;

   result      = new Vector<String>();
   punctuation = " .,;:!?'\"";
   lines       = s.split("\n");

   for (i = 0; i < lines.length; i++) {
     boundary      = BreakIterator.getWordInstance();
     boundary.setText(lines[i]);
     boundaryStart = boundary.first();
     boundaryEnd   = boundary.next();
     line          = "";

     while (boundaryEnd != BreakIterator.DONE) {
word = lines[i].substring(boundaryStart, boundaryEnd);
if (line.length() >= columns) {
  if (word.length() == 1) {
    if (punctuation.indexOf(word.charAt(0)) > -1) {
      line += word;
      word = "";
    }
  }
  result.add(line);
  line = "";
}
line          += word;
boundaryStart  = boundaryEnd;
boundaryEnd    = boundary.next();
     }
     if (line.length() > 0)
result.add(line);
   }

   return result.toArray(new String[result.size()]);
 }
 
Example 18
Source File: BreakIteratorTest.java    From dragonwell8_jdk with GNU General Public License v2.0 4 votes vote down vote up
private void doBreakInvariantTest(BreakIterator tb, String testChars)
{
    StringBuffer work = new StringBuffer("aaa");
    int errorCount = 0;

    // a break should always occur after CR (unless followed by LF), LF, PS, and LS
    String breaks = /*"\r\n\u2029\u2028"*/"\n\u2029\u2028";
                        // change this back when new BI code is added

    for (int i = 0; i < breaks.length(); i++) {
        work.setCharAt(1, breaks.charAt(i));
        for (int j = 0; j < testChars.length(); j++) {
            work.setCharAt(0, testChars.charAt(j));
            for (int k = 0; k < testChars.length(); k++) {
                char c = testChars.charAt(k);

                // if a cr is followed by lf, don't do the check (they stay together)
                if (work.charAt(1) == '\r' && (c == '\n'))
                    continue;

                // CONTROL (Cc) and FORMAT (Cf) Characters are to be ignored
                // for breaking purposes as per UTR14
                int type1 = Character.getType(work.charAt(1));
                int type2 = Character.getType(c);
                if (type1 == Character.CONTROL || type1 == Character.FORMAT ||
                    type2 == Character.CONTROL || type2 == Character.FORMAT) {
                    continue;
                }

                work.setCharAt(2, c);
                tb.setText(work.toString());
                boolean seen2 = false;
                for (int l = tb.first(); l != BreakIterator.DONE; l = tb.next()) {
                    if (l == 2)
                        seen2 = true;
                }
                if (!seen2) {
                    errln("No break between U+" + Integer.toHexString((int)(work.charAt(1)))
                                + " and U+" + Integer.toHexString((int)(work.charAt(2))));
                    errorCount++;
                    if (errorCount >= 75)
                        return;
                }
            }
        }
    }
}
 
Example 19
Source File: BreakIteratorTest.java    From openjdk-jdk9 with GNU General Public License v2.0 4 votes vote down vote up
private void doOtherInvariantTest(BreakIterator tb, String testChars)
{
    StringBuffer work = new StringBuffer("a\r\na");
    int errorCount = 0;

    // a break should never occur between CR and LF
    for (int i = 0; i < testChars.length(); i++) {
        work.setCharAt(0, testChars.charAt(i));
        for (int j = 0; j < testChars.length(); j++) {
            work.setCharAt(3, testChars.charAt(j));
            tb.setText(work.toString());
            for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())
                if (k == 2) {
                    errln("Break between CR and LF in string U+" + Integer.toHexString(
                            (int)(work.charAt(0))) + ", U+d U+a U+" + Integer.toHexString(
                            (int)(work.charAt(3))));
                    errorCount++;
                    if (errorCount >= 75)
                        return;
                }
        }
    }

    // a break should never occur before a non-spacing mark, unless it's preceded
    // by a line terminator
    work.setLength(0);
    work.append("aaaa");
    for (int i = 0; i < testChars.length(); i++) {
        char c = testChars.charAt(i);
        if (c == '\n' || c == '\r' || c == '\u2029' || c == '\u2028' || c == '\u0003')
            continue;
        work.setCharAt(1, c);
        for (int j = 0; j < testChars.length(); j++) {
            c = testChars.charAt(j);
            if (Character.getType(c) != Character.NON_SPACING_MARK && Character.getType(c)
                    != Character.ENCLOSING_MARK)
                continue;
            work.setCharAt(2, c);

            // CONTROL (Cc) and FORMAT (Cf) Characters are to be ignored
            // for breaking purposes as per UTR14
            int type1 = Character.getType(work.charAt(1));
            int type2 = Character.getType(work.charAt(2));
            if (type1 == Character.CONTROL || type1 == Character.FORMAT ||
                type2 == Character.CONTROL || type2 == Character.FORMAT) {
                continue;
            }

            tb.setText(work.toString());
            for (int k = tb.first(); k != BreakIterator.DONE; k = tb.next())
                if (k == 2) {
                    errln("Break between U+" + Integer.toHexString((int)(work.charAt(1)))
                            + " and U+" + Integer.toHexString((int)(work.charAt(2))));
                    errorCount++;
                    if (errorCount >= 75)
                        return;
                }
        }
    }
}
 
Example 20
Source File: StemmingLemaEx.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 4 votes vote down vote up
public static void main(String args[]){
    String words[] = {"bank", "banking", "banks", "banker", "banked", 
"bankart"};
    PorterStemmer ps = new PorterStemmer();
    for(String w : words){
        String stem = ps.stem(w);
        System.out.println("Word : " + w + " Stem : " + stem);
    }
    String paragraph = "When determining the end of sentences "
        + "we need to consider several factors. Sentences may end with "
        + "exclamation marks! Or possibly questions marks? Within "
        + "sentences we may find numbers like 3.14159, abbreviations "
        + "such as found in Mr. Smith, and possibly ellipses either "
        + "within a sentence …, or at the end of a sentence…";
    String simple = "[.?!]";
    String[] splitString = (paragraph.split(simple));
    for (String string : splitString) {
        System.out.println(string);
    }
    System.out.println("-------------Using Pattern and Matcher-------------");
    Pattern sentencePattern = Pattern.compile(
        "# Match a sentence ending in punctuation or EOS.\n"
        + "[^.!?\\s]    # First char is non-punct, non-ws\n"
        + "[^.!?]*      # Greedily consume up to punctuation.\n"
        + "(?:          # Group for unrolling the loop.\n"
        + "  [.!?]      # (special) inner punctuation ok if\n"
        + "  (?!['\"]?\\s|$)  # not followed by ws or EOS.\n"
        + "  [^.!?]*    # Greedily consume up to punctuation.\n"
        + ")*           # Zero or more (special normal*)\n"
        + "[.!?]?       # Optional ending punctuation.\n"
        + "['\"]?       # Optional closing quote.\n"
        + "(?=\\s|$)",
        Pattern.MULTILINE | Pattern.COMMENTS);
    Matcher matcher = sentencePattern.matcher(paragraph);
    while (matcher.find()) {
        System.out.println(matcher.group());
    }
    System.out.println("-------------Using BreakIterator-------------");
    BreakIterator si = BreakIterator.getSentenceInstance();
    Locale cl = new Locale("en", "US");
    si.setText(paragraph);
    int boundary = si.first();
    while(boundary!=BreakIterator.DONE){
        int begin = boundary;
        System.out.println(boundary + " - ");
        boundary = si.next();
        int end = boundary;
        if(end == BreakIterator.DONE){
            break;
        }
        System.out.println(boundary + " [ " + paragraph.substring(begin,end) + " ] ");
    }
    System.out.println("-------------Using SentenceDetectorME-------------");
    try{
        InputStream is = new FileInputStream(new File("/home/ashish/Downloads/" + "en-sent.bin"));
        SentenceModel sm = new SentenceModel(is);
        SentenceDetectorME detector = new SentenceDetectorME(sm);
        String sentences [] = detector.sentDetect(paragraph);
        for(String s : sentences){
            System.out.println(s);
        }
    }
    catch(IOException e){
        System.out.println("Error Detected" + e);
        e.printStackTrace();
    }
}