Java Code Examples for java.util.regex.MatchResult#start()

The following examples show how to use java.util.regex.MatchResult#start() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HeidelTimeOpenNLP.java    From newsleak with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Check whether the part of speech constraint defined in a rule is satisfied.
 * 
 * @param s
 * @param posConstraint
 * @param m
 * @param jcas
 * @return
 */
public boolean checkPosConstraint(Sentence s, String posConstraint, MatchResult m, JCas jcas) {
	Pattern paConstraint = Pattern.compile("group\\(([0-9]+)\\):(.*?):");
	for (MatchResult mr : Toolbox.findMatches(paConstraint, posConstraint)) {
		int groupNumber = Integer.parseInt(mr.group(1));
		int tokenBegin = s.getBegin() + m.start(groupNumber);
		int tokenEnd = s.getBegin() + m.end(groupNumber);
		String pos = mr.group(2);
		String pos_as_is = getPosFromMatchResult(tokenBegin, tokenEnd, s, jcas);
		if (pos_as_is.matches(pos)) {
			Logger.printDetail("POS CONSTRAINT IS VALID: pos should be " + pos + " and is " + pos_as_is);
		} else {
			return false;
		}
	}
	return true;
}
 
Example 2
Source File: CallbackMatcher.java    From EDDI with Apache License 2.0 6 votes vote down vote up
public String replaceMatches(CharSequence charSequence, Callback callback) throws CallbackMatcherException {
    StringBuilder result = new StringBuilder(charSequence);
    final Matcher matcher = this.pattern.matcher(charSequence);
    int offset = 0;

    while (matcher.find()) {
        final MatchResult matchResult = matcher.toMatchResult();
        final String replacement = callback.foundMatch(matchResult);
        if (replacement == null) {
            continue;
        }

        int matchStart = offset + matchResult.start();
        int matchEnd = offset + matchResult.end();

        result.replace(matchStart, matchEnd, replacement);

        int matchLength = matchResult.end() - matchResult.start();
        int lengthChange = replacement.length() - matchLength;

        offset += lengthChange;
    }

    return result.toString();
}
 
Example 3
Source File: FormatterPreviewUtils.java    From APICloud-Studio with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Do a content substitution by looking at the array size and looking for {0}...{n} strings and replace them with
 * the array's content.<br>
 * (Note - we use this method and not the NLS.bind() because it does not handle well code blocks existence)
 * 
 * @param content
 * @param substitutions
 * @return A string, substituted with the array's content.
 */
private static String substitute(String content, String[] substitutions)
{
	StringBuilder buffer = new StringBuilder(content);
	Matcher matcher = SUBSTITUTION_PATTERN.matcher(content);
	int offset = 0;
	while (matcher.find())
	{
		MatchResult matchResult = matcher.toMatchResult();
		int beginIndex = matchResult.start();
		int endIndex = matchResult.end();
		int index = Integer.parseInt(content.substring(beginIndex + 1, endIndex - 1));
		if (index >= 0 && index < substitutions.length)
		{
			String replacement = substitutions[index];
			int matchLength = endIndex - beginIndex;
			buffer.replace(offset + beginIndex, offset + endIndex, replacement);
			offset += (replacement.length() - matchLength);
		}
	}
	return buffer.toString();
}
 
Example 4
Source File: RegExpPrototype.java    From es6draft with MIT License 6 votes vote down vote up
private static Object RegExpSearch(ExecutionContext cx, RegExpObject rx, String s) {
    // Directly throw TypeErrors instead of saving and restoring the "lastIndex" property.
    Object previousLastIndex = rx.getLastIndex().getValue();
    boolean lastIndexIsZero = SameValue(previousLastIndex, 0);
    if (!lastIndexIsZero) {
        RegExpThrowIfLastIndexNonWritable(cx, rx);
    }
    /* steps 1-3 (not applicable) */
    /* steps 4-7 */
    boolean sticky = rx.isSet(RegExpObject.Flags.Sticky);
    boolean global = rx.isSet(RegExpObject.Flags.Global);
    MatchResult result = matchResultOrNull(cx, rx, s, 0, sticky, true);
    if (lastIndexIsZero && (global || sticky)) {
        // Emulate the lastIndex update from RegExpBuiltinExec.
        RegExpThrowIfLastIndexNonWritable(cx, rx);
    }
    /* step 8 */
    if (result == null) {
        return -1;
    }
    /* step 9 */
    return result.start();
}
 
Example 5
Source File: FindReplaceDialog.java    From nextreports-designer with Apache License 2.0 6 votes vote down vote up
/**
 * Search from same startIndex as the previous search. 
 * Checks if the match is different from the last (either 
 * extended/reduced) at the same position. Returns true
 * if the current match result represents a different match 
 * than the last, false if no match or the same.
 */
private boolean foundExtendedMatch(Pattern pattern, int start) {
    if (pattern.pattern().equals(lastRegex)) {
        return false;
    }
    
    int length = target.getDocument().getLength() - start;
    try {
        target.getDocument().getText(start, length, segment);
    } catch (BadLocationException e) {
    	e.printStackTrace();
    }
    
    Matcher matcher = pattern.matcher(segment.toString());
    MatchResult matchResult = getMatchResult(matcher, true);
    if (matchResult != null) {
        if ((matchResult.start() == 0) &&  (!lastMatchResult.group().equals(matchResult.group()))) {
            updateStateAfterFound(matchResult, start);
            return true;
        } 
    }
    
    return false;
}
 
Example 6
Source File: DustAnnotator.java    From Intellij-Dust with MIT License 6 votes vote down vote up
@Override
public void annotate(@NotNull final PsiElement element, @NotNull AnnotationHolder holder) {
  if (element instanceof DustOpenTag) {
    DustOpenTag openTag = (DustOpenTag) element;
    checkMatchingCloseTag(openTag, holder);
  }

  if (element.getNode().getElementType() == DustTypes.COMMENT) {
    String commentStr = element.getText();

    if (commentStr.length() >= 8) {
      commentStr = commentStr.substring(0, commentStr.length() - 2);
      Pattern p = Pattern.compile("TODO[^\n]*");
      Matcher m = p.matcher(commentStr);

      int startOffset = element.getTextRange().getStartOffset();
      while (m.find()) {
        MatchResult mr = m.toMatchResult();
        TextRange tr = new TextRange(startOffset + mr.start(), startOffset + mr.end());
        holder.createInfoAnnotation(tr, null).setTextAttributes(DustSyntaxHighlighter.TODO);
      }
    }
  }
}
 
Example 7
Source File: ElasticGazetteerService.java    From CogStack-Pipeline with Apache License 2.0 5 votes vote down vote up
private String replaceStrings(List<MatchResult> results, String document) {
    StringBuffer sb = new StringBuffer(document);
    for(MatchResult m : results) {
        int startOffset =m.start();
        int endOffset = m.end();
        StringBuffer outputBuffer = new StringBuffer();
        for (int i = 0; i < (endOffset - startOffset); i++) {
            outputBuffer.append("X");
        }
        sb.replace(startOffset, endOffset, outputBuffer.toString());
    }
    return sb.toString();
}
 
Example 8
Source File: CommentScanner.java    From ipst with Mozilla Public License 2.0 5 votes vote down vote up
int skipStart(String line, Matcher quote, Matcher blockCommentEnd) {
    MatchResult m = null;
    if (isInsideString) {
        m = find(quote, 0);
    } else if (isInsideBlockComment) {
        m = find(blockCommentEnd, 0);
    }

    int p = 0;
    if (m != null) {
        // If we were inside block comment emit a block comment, remove the flag
        if (isInsideBlockComment) {
            if (m.start() > 0) {
                eventHandler.onBlockComment(line.substring(0, m.start()), false, true);
            }
            isInsideBlockComment = false;
        } else if (isInsideString) {
            // If we were inside string emit a quoted string, remove the flag
            if (m.start() > 0) {
                eventHandler.onQuoted(line.substring(0, quoteStart(m)), false, true);
            }
            isInsideString = false;
        }
        p = m.end();
    }
    return p;
}
 
Example 9
Source File: HeidelTimeOpenNLP.java    From newsleak with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Check token boundaries of expressions.
 * 
 * @param r
 *            MatchResult
 * @param s
 *            Respective sentence
 * @return whether or not the MatchResult is a clean one
 */
public static Boolean checkInfrontBehind(MatchResult r, Sentence s) {
	Boolean ok = true;

	// get rid of expressions such as "1999" in 53453.1999
	if (r.start() > 1) {
		if ((s.getCoveredText().substring(r.start() - 2, r.start()).matches("\\d\\."))) {
			ok = false;
		}
	}

	// get rid of expressions if there is a character or symbol ($+)
	// directly in front of the expression
	if (r.start() > 0) {
		if (((s.getCoveredText().substring(r.start() - 1, r.start()).matches("[\\w\\$\\+]")))
				&& (!(s.getCoveredText().substring(r.start() - 1, r.start()).matches("\\(")))) {
			ok = false;
		}
	}

	if (r.end() < s.getCoveredText().length()) {
		if ((s.getCoveredText().substring(r.end(), r.end() + 1).matches("[°\\w]"))
				&& (!(s.getCoveredText().substring(r.end(), r.end() + 1).matches("\\)")))) {
			ok = false;
		}
		if (r.end() + 1 < s.getCoveredText().length()) {
			if (s.getCoveredText().substring(r.end(), r.end() + 2).matches("[\\.,]\\d")) {
				ok = false;
			}
		}
	}
	return ok;
}
 
Example 10
Source File: RegExpPrototype.java    From es6draft with MIT License 5 votes vote down vote up
/**
 * 21.2.5.9 RegExp.prototype[ @@search ] ( string )
 * 
 * @param cx
 *            the execution context
 * @param thisValue
 *            the function this-value
 * @param string
 *            the string
 * @return the string index of the first match
 */
@Function(name = "[Symbol.search]", symbol = BuiltinSymbol.search, arity = 1)
public static Object search(ExecutionContext cx, Object thisValue, Object string) {
    /* step 2 */
    if (!Type.isObject(thisValue)) {
        throw newTypeError(cx, Messages.Key.NotObjectType);
    }
    /* step 1 */
    ScriptObject rx = Type.objectValue(thisValue);
    /* step 3 */
    String s = ToFlatString(cx, string);
    /* steps 4-9 (fast path) */
    if (isDefaultRegExpObjectForExec(cx, rx)) {
        return RegExpSearch(cx, (RegExpObject) rx, s);
    }
    /* step 4 */
    Object previousLastIndex = Get(cx, rx, "lastIndex");
    /* step 5 */
    if (!SameValue(previousLastIndex, 0)) {
        Set(cx, rx, "lastIndex", 0, true);
    }
    /* step 6 */
    MatchResult result = matchResultOrNull(cx, rx, s, true);
    /* step 7 */
    Object currentLastIndex = Get(cx, rx, "lastIndex");
    if (!SameValue(currentLastIndex, previousLastIndex)) {
        Set(cx, rx, "lastIndex", previousLastIndex, true);
    }
    /* step 8 */
    if (result == null) {
        return -1;
    }
    /* step 9 */
    if (result instanceof ScriptObjectMatchResult) {
        // Extract wrapped script object to ensure no ToInteger conversion takes place
        ScriptObject object = ((ScriptObjectMatchResult) result).object;
        return Get(cx, object, "index");
    }
    return result.start();
}
 
Example 11
Source File: FindReplaceDialog.java    From nextreports-designer with Apache License 2.0 5 votes vote down vote up
private int updateStateAfterFound(MatchResult matchResult, int offset) {
    int end = matchResult.end() + offset;
    int found = matchResult.start() + offset; 
    
    target.select(found, end);
    target.getCaret().setSelectionVisible(true);
    
    // update state variables
    lastFoundIndex = found;
    lastMatchResult = matchResult;
    lastRegex = ((Matcher) lastMatchResult).pattern().pattern();
    
    return found;
}
 
Example 12
Source File: StreamRegexMatcher.java    From windup with Eclipse Public License 1.0 5 votes vote down vote up
@Override
public MatchProcessorResult process(StringBuilder characterBuffer, int firstModifiableCharacterInBuffer, MatchResult matchResult)
{
    long unmatchedStartLine = lineColumnAwareModificationFactory.getCurrentLine();
    long unmatchedStartColumn = lineColumnAwareModificationFactory.getCurrentColumn();
    int unmatchedStart = firstCharIndex;
    int unmatchedEnd = matchResult.start();
    String unmatched = characterBuffer.substring(unmatchedStart, unmatchedEnd);

    Matcher matcher = Pattern.compile("\r\n|\r|\n").matcher(unmatched);
    int numLines = 0;
    int endOfLastLineBreak = 0;
    while (matcher.find())
    {
        numLines++;
        endOfLastLineBreak = matcher.end();
    }
    long lineNumber = unmatchedStartLine + numLines;
    long columnNumber;
    if (numLines == 0)
    {
        columnNumber = unmatchedStartColumn + unmatched.length();
    }
    else
    {
        columnNumber = unmatched.length() - endOfLastLineBreak; // length of last line in 'unmatched'
    }

    String matchText = matchResult.group();
    StreamRegexMatchedEvent event = new StreamRegexMatchedEvent(matchText, lineNumber, columnNumber);
    listener.regexMatched(event);
    return super.process(characterBuffer, firstModifiableCharacterInBuffer, matchResult);
}
 
Example 13
Source File: CommentScanner.java    From ipst with Mozilla Public License 2.0 4 votes vote down vote up
void scan(String line) {
    // We will allow comment characters inside a constant string
    // We will also quoted strings inside a comment
    // We will emit block comments that span multiple lines as a sequence of comment blocks, one for every line
    // Same for multiple line quoted strings

    Matcher quote = quoteRegex.matcher(line);
    Matcher lineComment = lineCommentRegex.matcher(line);
    Matcher blockCommentStart = blockCommentStartRegex.matcher(line);
    Matcher blockCommentEnd = blockCommentEndRegex.matcher(line);

    // Look for the line starting point, taking into account special situations where
    // we are inside a string, line really begins at first occurrence of quote character (if it occurs in the line)
    // we are inside a block comment, line really begins at first occurrence of block comment end (if it occurs in the line)
    int p = skipStart(line, quote, blockCommentEnd);

    // If after skipping the start we continue inside a block comment emit whole line as a block comment and finish
    // Similar if we are still (totally) inside a string, emit whole line as a string and finish scan
    if (isInsideBlockComment) {
        eventHandler.onBlockComment(line, false, false);
        return;
    }
    if (isInsideString) {
        eventHandler.onQuoted(line, false, false);
        return;
    }

    // Go across the line
    while (p < line.length()) {
        MatchResult q;
        MatchResult bcs;
        MatchResult lc;

        // From current position find first of occurrence of {string start, block comment start, line comment start}
        q = find(quote, p);
        bcs = find(blockCommentStart, p);
        lc = find(lineComment, p);

        // Check what comes first
        boolean isQuote = q != null && (bcs == null || q.start() < bcs.start()) && (lc == null || q.start() < lc.start());
        boolean isBlockComment = bcs != null && (q == null || bcs.start() < q.start()) && (lc == null || bcs.start() < lc.start());
        boolean isLineComment = lc != null && (q == null || lc.start() < q.start()) && (bcs == null || lc.start() < bcs.start());

        if (isQuote) {
            // Emit as text from last pointer to here
            if (q.start() > 0) {
                eventHandler.onText(line.substring(p, quoteStart(q)));
            }
            p = skipQuoted(line, quote, q);
        } else if (isBlockComment) {
            // Emit as text from last pointer to here
            if (bcs.start() > 0) {
                eventHandler.onText(line.substring(p, bcs.start()));
            }
            p = skipBlockComment(line, blockCommentEnd, bcs);
        } else if (isLineComment) {
            if (lc.start() > 0) {
                eventHandler.onText(line.substring(p, lc.start()));
            }
            if (lc.end() < line.length()) {
                eventHandler.onLineComment(line.substring(lc.end()));
            }
            break;
        } else {
            // Emit the rest of the line as text and finish
            eventHandler.onText(line.substring(p));
            break;
        }
    }
}
 
Example 14
Source File: CommentScanner.java    From ipst with Mozilla Public License 2.0 4 votes vote down vote up
int quoteStart(MatchResult q) {
    // If quote match start is greater than zero it must have been matched by a non-quote + quote
    // The position of the quote character is adjusted
    return q.start() > 0 ? q.start() + 1 : 0;
}
 
Example 15
Source File: HeidelTimeOpenNLP.java    From newsleak with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 * Check token boundaries using token information
 * 
 * @param r
 *            MatchResult
 * @param s
 *            respective Sentence
 * @param jcas
 *            current CAS object
 * @return whether or not the MatchResult is a clean one
 */
public static Boolean checkTokenBoundaries(MatchResult r, Sentence s, JCas jcas) {
	Boolean beginOK = false;
	Boolean endOK = false;

	// whole expression is marked as a sentence
	if ((r.end() - r.start()) == (s.getEnd() - s.getBegin())) {
		return true;
	}

	// Only check Token boundaries if no white-spaces in front of and behind
	// the match-result
	if ((r.start() > 0) && ((s.getCoveredText().subSequence(r.start() - 1, r.start()).equals(" ")))
			&& ((r.end() < s.getCoveredText().length())
					&& ((s.getCoveredText().subSequence(r.end(), r.end() + 1).equals(" "))))) {
		return true;
	}

	// other token boundaries than white-spaces
	else {
		FSIterator iterToken = jcas.getAnnotationIndex(Token.type).subiterator(s);
		while (iterToken.hasNext()) {
			Token t = (Token) iterToken.next();

			// Check begin
			if ((r.start() + s.getBegin()) == t.getBegin()) {
				beginOK = true;
			}
			// Tokenizer does not split number from some symbols (".", "/",
			// "-", "–"),
			// e.g., "...12 August-24 Augsut..."
			else if ((r.start() > 0) && ((s.getCoveredText().subSequence(r.start() - 1, r.start()).equals("."))
					|| (s.getCoveredText().subSequence(r.start() - 1, r.start()).equals("/"))
					|| (s.getCoveredText().subSequence(r.start() - 1, r.start()).equals("–"))
					|| (s.getCoveredText().subSequence(r.start() - 1, r.start()).equals("-")))) {
				beginOK = true;
			}

			// Check end
			if ((r.end() + s.getBegin()) == t.getEnd()) {
				endOK = true;
			}
			// Tokenizer does not split number from some symbols (".", "/",
			// "-", "–"),
			// e.g., "... in 1990. New Sentence ..."
			else if ((r.end() < s.getCoveredText().length())
					&& ((s.getCoveredText().subSequence(r.end(), r.end() + 1).equals("."))
							|| (s.getCoveredText().subSequence(r.end(), r.end() + 1).equals("/"))
							|| (s.getCoveredText().subSequence(r.end(), r.end() + 1).equals("–"))
							|| (s.getCoveredText().subSequence(r.end(), r.end() + 1).equals("-")))) {
				endOK = true;
			}

			if (beginOK && endOK)
				return true;
		}
	}
	return false;
}
 
Example 16
Source File: PercentToBraceConverter.java    From Pydev with Eclipse Public License 1.0 4 votes vote down vote up
/**
 * <p>Create a new {@link #PercentConversion} instance.</p>
 * 
 * <p>A <tt>PercentConversion</tt> instance is created from one
 * particular specifier match result and is fixed after creation.</p>
 * 
 * This is because for some format strings, it is expected that 
 * multiple <tt>PercentConversions</tt> will be needed to fully convert
 * the format string and each <tt>PercentConversion</tt> should represent
 * one specifier and one specifier only in the format string.
 * 
 * @param aConverter - the enclosing {@link #PercentToBraceConverter} instance
 * @param aMatch - a specific {@link java.util.regex#MatchResult MatchResult} that holds 
 *                 information about the matched specifier token.
 * @throws IllegalArgumentException
 *          if <tt>aConverter</tt> or <tt>aMatch</tt> is <tt>null</tt>
 *          
 * @throws IllegalStateException 
 *          if <tt>aMatch</tt> is passed before a successful match could be made
 *          it is said to have inconsistent state.
 */
public PercentConversion(PercentToBraceConverter aConverter, MatchResult aMatch)
        throws IllegalArgumentException, IllegalStateException {

    if (null == aConverter) {
        throw new IllegalArgumentException("Converter can't be null!");
    }
    if (null == aMatch) {
        throw new IllegalArgumentException("Match can't be null!");
    }

    source = aMatch.group(0);
    span = new int[] { aMatch.start(), aMatch.end() };

    final Map<String, String> groups = extractTokenGroups(aMatch);

    String spec = groups.get("Key");
    if (null == spec) {
        if ("%%".equals(source)) {
            key = "";
        } else {
            key = aConverter.nextIndex();
        }
    } else {
        key = spec;
    }

    spec = groups.get("Width");
    if (null != spec && "*".equals(spec)) {
        // TODO: {} representation is hard-wired, could generalize this if needed
        width = String.format("{%s}", aConverter.nextIndex());
    } else {
        width = spec;
    }

    spec = groups.get("Precision");
    if (null != spec && "*".equals(spec)) {
        precision = String.format("{%s}", aConverter.nextIndex());
    } else {
        precision = spec;
    }

    flags = groups.get("Flags");
    conversion = groups.get("Conversion");
}