Java Code Examples for java.text.BreakIterator#getSentenceInstance()
The following examples show how to use
java.text.BreakIterator#getSentenceInstance() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SimpleGroovyDoc.java From groovy with Apache License 2.0 | 6 votes |
public static String calculateFirstSentence(String raw) { // remove all the * from beginning of lines String text = raw.replaceAll("(?m)^\\s*\\*", "").trim(); // assume a <p> paragraph tag signifies end of sentence text = text.replaceFirst("(?ms)<p>.*", "").trim(); // assume completely blank line signifies end of sentence text = text.replaceFirst("(?ms)\\n\\s*\\n.*", "").trim(); // assume @tag signifies end of sentence text = text.replaceFirst("(?ms)\\n\\s*@(see|param|throws|return|author|since|exception|version|deprecated|todo)\\s.*", "").trim(); // Comment Summary using first sentence (Locale sensitive) BreakIterator boundary = BreakIterator.getSentenceInstance(Locale.getDefault()); // todo - allow locale to be passed in boundary.setText(text); int start = boundary.first(); int end = boundary.next(); if (start > -1 && end > -1) { // need to abbreviate this comment for the summary text = text.substring(start, end); } return text; }
Example 2
Source File: DocCommentParser.java From hottub with GNU General Public License v2.0 | 6 votes |
DocCommentParser(ParserFactory fac, DiagnosticSource diagSource, Comment comment) { this.fac = fac; this.diagSource = diagSource; this.comment = comment; names = fac.names; m = fac.docTreeMaker; Locale locale = (fac.locale == null) ? Locale.getDefault() : fac.locale; Options options = fac.options; boolean useBreakIterator = options.isSet("breakIterator"); if (useBreakIterator || !locale.getLanguage().equals(Locale.ENGLISH.getLanguage())) sentenceBreaker = BreakIterator.getSentenceInstance(locale); initTagParsers(); }
Example 3
Source File: ImportExportServiceImpl.java From webanno with Apache License 2.0 | 6 votes |
public static void splitSentences(CAS aCas) { BreakIterator bi = BreakIterator.getSentenceInstance(Locale.US); bi.setText(aCas.getDocumentText()); int last = bi.first(); int cur = bi.next(); while (cur != BreakIterator.DONE) { int[] span = new int[] { last, cur }; trim(aCas.getDocumentText(), span); if (!isEmpty(span[0], span[1])) { aCas.addFsToIndexes(createSentence(aCas, span[0], span[1])); } last = cur; cur = bi.next(); } }
Example 4
Source File: AccessibleHTML.java From Bytecoder with Apache License 2.0 | 5 votes |
/** * Returns the Segment at <code>index</code> representing either * the paragraph or sentence as identified by <code>part</code>, or * null if a valid paragraph/sentence can't be found. The offset * will point to the start of the word/sentence in the array, and * the modelOffset will point to the location of the word/sentence * in the model. */ private IndexedSegment getSegmentAt(int part, int index) throws BadLocationException { IndexedSegment seg = getParagraphElementText(index); if (seg == null) { return null; } BreakIterator iterator; switch (part) { case AccessibleText.WORD: iterator = BreakIterator.getWordInstance(getLocale()); break; case AccessibleText.SENTENCE: iterator = BreakIterator.getSentenceInstance(getLocale()); break; default: return null; } seg.first(); iterator.setText(seg); int end = iterator.following(index - seg.modelOffset + seg.offset); if (end == BreakIterator.DONE) { return null; } if (end > seg.offset + seg.count) { return null; } int begin = iterator.previous(); if (begin == BreakIterator.DONE || begin >= seg.offset + seg.count) { return null; } seg.modelOffset = seg.modelOffset + begin - seg.offset; seg.offset = begin; seg.count = end - begin; return seg; }
Example 5
Source File: DocLocale.java From openjdk-jdk8u with GNU General Public License v2.0 | 5 votes |
/** * Constructor */ DocLocale(DocEnv docenv, String localeName, boolean useBreakIterator) { this.docenv = docenv; this.localeName = localeName; this.useBreakIterator = useBreakIterator; locale = getLocale(); if (locale == null) { docenv.exit(); } else { Locale.setDefault(locale); // NOTE: updating global state } collator = Collator.getInstance(locale); sentenceBreaker = BreakIterator.getSentenceInstance(locale); }
Example 6
Source File: AccessibleHTML.java From hottub with GNU General Public License v2.0 | 5 votes |
/** * Returns the Segment at <code>index</code> representing either * the paragraph or sentence as identified by <code>part</code>, or * null if a valid paragraph/sentence can't be found. The offset * will point to the start of the word/sentence in the array, and * the modelOffset will point to the location of the word/sentence * in the model. */ private IndexedSegment getSegmentAt(int part, int index) throws BadLocationException { IndexedSegment seg = getParagraphElementText(index); if (seg == null) { return null; } BreakIterator iterator; switch (part) { case AccessibleText.WORD: iterator = BreakIterator.getWordInstance(getLocale()); break; case AccessibleText.SENTENCE: iterator = BreakIterator.getSentenceInstance(getLocale()); break; default: return null; } seg.first(); iterator.setText(seg); int end = iterator.following(index - seg.modelOffset + seg.offset); if (end == BreakIterator.DONE) { return null; } if (end > seg.offset + seg.count) { return null; } int begin = iterator.previous(); if (begin == BreakIterator.DONE || begin >= seg.offset + seg.count) { return null; } seg.modelOffset = seg.modelOffset + begin - seg.offset; seg.offset = begin; seg.count = end - begin; return seg; }
Example 7
Source File: TestCustomSeparatorBreakIterator.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testSliceEnd() throws Exception { BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT); BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator()); assertSameBreaks("a000", 0, 1, expected, actual); assertSameBreaks("ab000", 0, 1, expected, actual); assertSameBreaks("abc000", 0, 1, expected, actual); assertSameBreaks("000", 0, 0, expected, actual); }
Example 8
Source File: LocalizedBundleInfo.java From netbeans with Apache License 2.0 | 5 votes |
private static String[] splitBySentence(String text) { List<String> sentences = new ArrayList<String>(); // Use Locale.US since the customizer is setting the default (US) locale text only: BreakIterator it = BreakIterator.getSentenceInstance(Locale.US); it.setText(text); int start = it.first(); int end; while ((end = it.next()) != BreakIterator.DONE) { sentences.add(text.substring(start, end)); start = end; } return sentences.toArray(new String[sentences.size()]); }
Example 9
Source File: AccessibleHTML.java From JDKSourceCode1.8 with MIT License | 5 votes |
/** * Returns the Segment at <code>index</code> representing either * the paragraph or sentence as identified by <code>part</code>, or * null if a valid paragraph/sentence can't be found. The offset * will point to the start of the word/sentence in the array, and * the modelOffset will point to the location of the word/sentence * in the model. */ private IndexedSegment getSegmentAt(int part, int index) throws BadLocationException { IndexedSegment seg = getParagraphElementText(index); if (seg == null) { return null; } BreakIterator iterator; switch (part) { case AccessibleText.WORD: iterator = BreakIterator.getWordInstance(getLocale()); break; case AccessibleText.SENTENCE: iterator = BreakIterator.getSentenceInstance(getLocale()); break; default: return null; } seg.first(); iterator.setText(seg); int end = iterator.following(index - seg.modelOffset + seg.offset); if (end == BreakIterator.DONE) { return null; } if (end > seg.offset + seg.count) { return null; } int begin = iterator.previous(); if (begin == BreakIterator.DONE || begin >= seg.offset + seg.count) { return null; } seg.modelOffset = seg.modelOffset + begin - seg.offset; seg.offset = begin; seg.count = end - begin; return seg; }
Example 10
Source File: AccessibleHTML.java From jdk8u_jdk with GNU General Public License v2.0 | 5 votes |
/** * Returns the Segment at <code>index</code> representing either * the paragraph or sentence as identified by <code>part</code>, or * null if a valid paragraph/sentence can't be found. The offset * will point to the start of the word/sentence in the array, and * the modelOffset will point to the location of the word/sentence * in the model. */ private IndexedSegment getSegmentAt(int part, int index) throws BadLocationException { IndexedSegment seg = getParagraphElementText(index); if (seg == null) { return null; } BreakIterator iterator; switch (part) { case AccessibleText.WORD: iterator = BreakIterator.getWordInstance(getLocale()); break; case AccessibleText.SENTENCE: iterator = BreakIterator.getSentenceInstance(getLocale()); break; default: return null; } seg.first(); iterator.setText(seg); int end = iterator.following(index - seg.modelOffset + seg.offset); if (end == BreakIterator.DONE) { return null; } if (end > seg.offset + seg.count) { return null; } int begin = iterator.previous(); if (begin == BreakIterator.DONE || begin >= seg.offset + seg.count) { return null; } seg.modelOffset = seg.modelOffset + begin - seg.offset; seg.offset = begin; seg.count = end - begin; return seg; }
Example 11
Source File: AccessibleHTML.java From jdk8u-jdk with GNU General Public License v2.0 | 5 votes |
/** * Returns the Segment at <code>index</code> representing either * the paragraph or sentence as identified by <code>part</code>, or * null if a valid paragraph/sentence can't be found. The offset * will point to the start of the word/sentence in the array, and * the modelOffset will point to the location of the word/sentence * in the model. */ private IndexedSegment getSegmentAt(int part, int index) throws BadLocationException { IndexedSegment seg = getParagraphElementText(index); if (seg == null) { return null; } BreakIterator iterator; switch (part) { case AccessibleText.WORD: iterator = BreakIterator.getWordInstance(getLocale()); break; case AccessibleText.SENTENCE: iterator = BreakIterator.getSentenceInstance(getLocale()); break; default: return null; } seg.first(); iterator.setText(seg); int end = iterator.following(index - seg.modelOffset + seg.offset); if (end == BreakIterator.DONE) { return null; } if (end > seg.offset + seg.count) { return null; } int begin = iterator.previous(); if (begin == BreakIterator.DONE || begin >= seg.offset + seg.count) { return null; } seg.modelOffset = seg.modelOffset + begin - seg.offset; seg.offset = begin; seg.count = end - begin; return seg; }
Example 12
Source File: AccessibleHTML.java From jdk8u-dev-jdk with GNU General Public License v2.0 | 5 votes |
/** * Returns the Segment at <code>index</code> representing either * the paragraph or sentence as identified by <code>part</code>, or * null if a valid paragraph/sentence can't be found. The offset * will point to the start of the word/sentence in the array, and * the modelOffset will point to the location of the word/sentence * in the model. */ private IndexedSegment getSegmentAt(int part, int index) throws BadLocationException { IndexedSegment seg = getParagraphElementText(index); if (seg == null) { return null; } BreakIterator iterator; switch (part) { case AccessibleText.WORD: iterator = BreakIterator.getWordInstance(getLocale()); break; case AccessibleText.SENTENCE: iterator = BreakIterator.getSentenceInstance(getLocale()); break; default: return null; } seg.first(); iterator.setText(seg); int end = iterator.following(index - seg.modelOffset + seg.offset); if (end == BreakIterator.DONE) { return null; } if (end > seg.offset + seg.count) { return null; } int begin = iterator.previous(); if (begin == BreakIterator.DONE || begin >= seg.offset + seg.count) { return null; } seg.modelOffset = seg.modelOffset + begin - seg.offset; seg.offset = begin; seg.count = end - begin; return seg; }
Example 13
Source File: SpellCheckIterator.java From xds-ide with Eclipse Public License 1.0 | 5 votes |
/** * Creates a new spell check iterator. * * @param document the document containing the specified partition * @param region the region to spell check * @param locale the locale to use for spell checking * @param breakIterator the break-iterator */ public SpellCheckIterator(IDocument document, IRegion region, Locale locale, BreakIterator breakIterator) { fOffset= region.getOffset(); fWordIterator= breakIterator; fDelimiter= TextUtilities.getDefaultLineDelimiter(document); String content; try { content= document.get(region.getOffset(), region.getLength()); } catch (Exception exception) { content= ""; //$NON-NLS-1$ } fContent= content; fWordIterator.setText(content); fPredecessor= fWordIterator.first(); fSuccessor= fWordIterator.next(); final BreakIterator iterator= BreakIterator.getSentenceInstance(locale); iterator.setText(content); int offset= iterator.current(); while (offset != BreakIterator.DONE) { fSentenceBreaks.add(new Integer(offset)); offset= iterator.next(); } }
Example 14
Source File: DocLocale.java From TencentKona-8 with GNU General Public License v2.0 | 5 votes |
/** * Constructor */ DocLocale(DocEnv docenv, String localeName, boolean useBreakIterator) { this.docenv = docenv; this.localeName = localeName; this.useBreakIterator = useBreakIterator; locale = getLocale(); if (locale == null) { docenv.exit(); } else { Locale.setDefault(locale); // NOTE: updating global state } collator = Collator.getInstance(locale); sentenceBreaker = BreakIterator.getSentenceInstance(locale); }
Example 15
Source File: BreakIteratorTest.java From jdk8u_jdk with GNU General Public License v2.0 | 5 votes |
public BreakIteratorTest() { characterBreak = BreakIterator.getCharacterInstance(); wordBreak = BreakIterator.getWordInstance(); lineBreak = BreakIterator.getLineInstance(); sentenceBreak = BreakIterator.getSentenceInstance(); }
Example 16
Source File: UnifiedSolrHighlighter.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * parse a break iterator type for the specified locale */ protected BreakIterator parseBreakIterator(String type, Locale locale) { if (type == null || "SENTENCE".equals(type)) { return BreakIterator.getSentenceInstance(locale); } else if ("LINE".equals(type)) { return BreakIterator.getLineInstance(locale); } else if ("WORD".equals(type)) { return BreakIterator.getWordInstance(locale); } else if ("CHARACTER".equals(type)) { return BreakIterator.getCharacterInstance(locale); } else { throw new IllegalArgumentException("Unknown " + HighlightParams.BS_TYPE + ": " + type); } }
Example 17
Source File: PropertiesDocReader.java From jasperreports with GNU Lesser General Public License v3.0 | 4 votes |
public void writeDefaultMessages() { Properties defaultMessages = new Properties(); BreakIterator sentenceBreaks = BreakIterator.getSentenceInstance(Locale.US); for (CompiledPropertyMetadata prop : properties.getProperties()) { String descriptionMessage = PropertyMetadataConstants.PROPERTY_DESCRIPTION_PREFIX + prop.getName(); if (propertyMessages == null || !propertyMessages.containsKey(descriptionMessage)) { Element docNode = propertyDocNodes.get(prop.getName()); if (docNode != null) { String docText = docNode.getTextContent(); sentenceBreaks.setText(docText); int first = sentenceBreaks.first(); int next = sentenceBreaks.next(); String firstSentence = docText.substring(first, next); firstSentence = PATTERN_LEADING_WHITE_SPACE.matcher(firstSentence).replaceAll(""); firstSentence = PATTERN_TRAILING_WHITE_SPACE.matcher(firstSentence).replaceAll(""); defaultMessages.setProperty(descriptionMessage, firstSentence); } } } if (!defaultMessages.isEmpty()) { try { FileObject res = environment.getFiler().createResource(StandardLocation.CLASS_OUTPUT, "", properties.getMessagesName() + PropertyMetadataConstants.MESSAGES_DEFAULTS_SUFFIX, (javax.lang.model.element.Element[]) null); try (OutputStream out = res.openOutputStream()) { //TODO lucianc preserve order defaultMessages.store(out, null); } } catch (IOException e) { throw new RuntimeException(e); } } }
Example 18
Source File: BreakIteratorTest.java From TencentKona-8 with GNU General Public License v2.0 | 4 votes |
public void TestSentenceInvariants() { BreakIterator e = BreakIterator.getSentenceInstance(); doOtherInvariantTest(e, cannedTestChars + ".,\u3001\u3002\u3041\u3042\u3043\ufeff"); }
Example 19
Source File: TestSegmentingTokenizerBase.java From lucene-solr with Apache License 2.0 | 4 votes |
public SentenceAndWordTokenizer() { super(newAttributeFactory(), BreakIterator.getSentenceInstance(Locale.ROOT)); }
Example 20
Source File: BreakIteratorTest.java From dragonwell8_jdk with GNU General Public License v2.0 | 4 votes |
public void TestSentenceInvariants() { BreakIterator e = BreakIterator.getSentenceInstance(); doOtherInvariantTest(e, cannedTestChars + ".,\u3001\u3002\u3041\u3042\u3043\ufeff"); }