Java Code Examples for it.unimi.dsi.lang.MutableString#append()

The following examples show how to use it.unimi.dsi.lang.MutableString#append() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: BUbiNG   File: MockFetchedResponses.java    License: Apache License 2.0 6 votes vote down vote up
public void fetch(URI uri) throws IOException {
	this.url = uri;
	MutableString s = new MutableString();
	s.append("<html><head><title>").append(uri).append("</title></head>\n");
	s.append("<body>\n");

	try {
		final int host = Integer.parseInt(uri.getHost());
		final int page = Integer.parseInt(uri.getRawPath().substring(1));
		final Random random = new Random(host << 32 | page);
		for(int i = 0; i < 10; i++)
			s.append("<a href=\"http://").append(host).append('/').append(random.nextInt(10000)).append("\">Link ").append(i).append("</a>\n");
		s.append("<a href=\"http://").append(random.nextInt(1000)).append('/').append(random.nextInt(10000)).append("\">External link ").append("</a>\n");

	}
	catch(NumberFormatException e) {}
	s.append("</body></html>\n");
	inspectableBufferedInputStream.write(ByteBuffer.wrap(Util.toByteArray(s.toString())));
}
 
Example 2
private void recToString( final Node n, final MutableString printPrefix, final MutableString result, final MutableString path, final int level ) {
	if ( n == null ) return;
	
	//System.err.println( "Called with prefix " + printPrefix );
	
	result.append( printPrefix ).append( '(' ).append( level ).append( ')' );
	
	if ( n.path != null ) {
		path.append( LongArrayBitVector.wrap( n.path, n.pathLength ) );
		result.append( " path:" ).append( LongArrayBitVector.wrap( n.path, n.pathLength ) );
	}
	if ( n.word >= 0 ) result.append( " word: " ).append( n.word ).append( " (" ).append( path ).append( ')' );

	result.append( '\n' );
	
	path.append( '0' );
	recToString( n.left, printPrefix.append( '\t' ).append( "0 => " ), result, path, level + 1 );
	path.charAt( path.length() - 1, '1' ); 
	recToString( n.right, printPrefix.replace( printPrefix.length() - 5, printPrefix.length(), "1 => "), result, path, level + 1 );
	path.delete( path.length() - 1, path.length() ); 
	printPrefix.delete( printPrefix.length() - 6, printPrefix.length() );
	
	//System.err.println( "Path now: " + path + " Going to delete from " + ( path.length() - n.pathLength));
	
	path.delete( path.length() - n.pathLength, path.length() );
}
 
Example 3
Source Project: tagme   File: Chars.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * @param word
 * @return word with the first character untouched and the others turned to lowercase.
 */
public static String capitalizeOnlyFirst(String word)
{
	MutableString s = new MutableString(word.length());
	s.append(word.charAt(0));
	for(int i=1; i<word.length(); i++)
	{
		char c = word.charAt(i);
		s.append(Character.toLowerCase(c));
	}
	return s.toString();
}
 
Example 4
Source Project: tagme   File: Chars.java    License: Apache License 2.0 5 votes vote down vote up
/**Convert sequence of characters from UTF-16 to ASCII.
 * @param input the sequence to convert.
 * @return the string converted to ASCII
 */
public static MutableString toNormalizedASCII(CharSequence input)
{
	int len = input.length();
	MutableString s = new MutableString(len+1);
	for(int i=0; i<len; i++)
		s.append(UTF16toASCII.MAP[(int)input.charAt(i)]);
	return s;
}
 
Example 5
Source Project: tagme   File: AnnotatedText.java    License: Apache License 2.0 5 votes vote down vote up
/**Creates an annotated text object reading data from a reader.
 * @param reader where to read the data from.
 * @param length the maximum length of the text to read.
 * @throws IOException if an error occurred while reading from the reader.
 */
public AnnotatedText(Reader reader, int length) throws IOException
{
	original = new MutableString(length+1);
	char[] buffer = new char[length+1];
	int read = 0;
	while((read=reader.read(buffer, 0, length+1)) >= 0)
		original.append(buffer, 0, read);
}
 
Example 6
Source Project: tagme   File: WikipediaAnchorParser.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Remove all punctuations for an anchor, i.e. remove all but letters, digits and whitespaces
 *
 * @param input
 * @param ignoreChars A set of character (no digits, no letters) that are ignored when removing
 * @param ignoreSequences if true, it does not remove sequences of the same characters i.e. '!!!'
 * @return A new MutableString
 */
public static MutableString removePunctuations(MutableString input, String ignoreChars, boolean ignoreSequences)
{
	int len = input.length();
	char[] array = input.array();
	MutableString norm = new MutableString(len);

	int i=0, last=0;
	while(i<len)
	{
		while(i<len && (
				Character.isLetter(array[i]) ||
				Character.isDigit(array[i]) ||
				(ignoreChars!=null && ignoreChars.indexOf(array[i])>=0) ||
				(ignoreSequences && !Character.isWhitespace(array[i]) && (i>0 && array[i-1]==array[i] || i<len-1 && array[i+1]==array[i]) )
				))
			i++;

		if (i>last) {
			if (norm.length() > 0) norm.append(' ');
			norm.append(array, last, i-last);
		}

		while(i<len && !(
				Character.isLetter(array[i]) ||
				Character.isDigit(array[i]) ||
				(ignoreChars!=null && ignoreChars.indexOf(array[i])>=0) ||
				(ignoreSequences && !Character.isWhitespace(array[i]) && (i>0 && array[i-1]==array[i] || i<len-1 && array[i+1]==array[i]) )
				)){
			i++;
			last=i;
		}
	}
	return norm;
}
 
Example 7
Source Project: tagme   File: WikipediaAnchorParser.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Manage dots, removing them if they are part of an abbreviation, or replacing them with
 * withespaces if they are the last char of a word
 * @param input
 * @return
 */
public static MutableString removeDots(MutableString input)
{
	int len = input.length();
	char[] array = input.array();
	MutableString res = new MutableString(len);

	boolean isLastDot = false;
	int i=0, last=0;
	while(i<len)
	{
		while(i<len && array[i]!='.' && !Character.isWhitespace(array[i]))
			i++;

		if (i>last) {
			if (isLastDot && res.length() > 0) res.append(' ');
			res.append(array, last, i-last);
		}

		isLastDot = false;
		while(i<len && (array[i]=='.' || Character.isWhitespace(array[i])))
		{
			if (Character.isWhitespace(array[i]) ||
					(i<len-2 && array[i+2]!='.' && !Character.isWhitespace(array[i+2])) ||
					(i==len-2 && i>1 && array[i-2]!='.' && !Character.isWhitespace(array[i-2]))
					)
				isLastDot = true;
			i++;
			last=i;
		}
	}
	return res;


}
 
Example 8
Source Project: tagme   File: SQLWikiParser.java    License: Apache License 2.0 5 votes vote down vote up
static String readToken(InputStreamReader r) throws IOException
{
    MutableString b = new MutableString();
    int c = r.read();
    while (c != ' ' && c != '\n' && c != -1)
    {
        b.append((char)c);
        c = r.read();
    }
    return b.toString();
}
 
Example 9
Source Project: tagme   File: ExternalSortUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static CharSequence formatFloatArray(float[] data, int precision){
	MutableString buf = new MutableString(data.length*2+1);
	buf.append('[');
	for(int i=0;i<data.length; i++){
		if (i>0) buf.append(',');
		buf.append(String.format("%."+precision+"f", data[i]));
	}
	buf.append(']');
	return buf;
}
 
Example 10
Source Project: tagme   File: ExternalSortUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static CharSequence formatDoubleArray(double[] data, int precision){
	MutableString buf = new MutableString(data.length*2+1);
	buf.append('[');
	for(int i=0;i<data.length; i++){
		if (i>0) buf.append(',');
		buf.append(String.format("%."+precision+"f", data[i]));
	}
	buf.append(']');
	return buf;
}
 
Example 11
protected MutableString getTerm( int index, final MutableString s ) {
	Node e = root;
			
	for( ;; ) {
		
		if ( e.left != null ) {
			if ( index < e.left.numNodes ) {
				s.append( e.path, 0, e.path.length - 1 );
				e = e.left;
				continue;
			}
			
			index -= e.left.numNodes;
		}
		
		if ( e.isWord ) {
			if ( index == 0 ) return s.append( e.path ).compact();
			index--;
		}
		
		
		if ( e.middle != null ) {
			if ( index < e.middle.numNodes ) {
				s.append( e.path );
				e = e.middle;
				continue;
			}

			index -= e.middle.numNodes;
		}
		
		s.append( e.path, 0, e.path.length - 1 );
		e = e.right;
	}
}
 
Example 12
@Override
public void read( byte[] bits, int len ) throws IOException {
	ibs.read( bits, len );
	MutableString s = new MutableString( " {" );
	for( int i = 0; i < bits.length; i++ ) s.append( DebugOutputBitStream.byte2Binary( bits[ i ] ) );
	pw.print( s.length( len ).append( "}" ) );
}
 
Example 13
static MutableString byte2Binary( int x ) {
	MutableString s = new MutableString();
	for( int i = 0 ; i < 8; i++ ) {
		s.append( (char)( '0' + ( x % 2 ) ) );
		x >>= 1;
	}
	return s.reverse();
}
 
Example 14
static MutableString int2Binary( long x, final int len ) {
	MutableString s = new MutableString();
	for( int i = 0 ; i < 64; i++ ) {
		s.append( (char)( '0' + ( x % 2 ) ) );
		x >>= 1;
	}
	return s.length( len ).reverse();
}
 
Example 15
public long write( final byte bits[], final long len ) throws IOException {
	if ( len > Integer.MAX_VALUE ) throw new IllegalArgumentException();
	MutableString s = new MutableString( " {" );
	for( int i = 0; i < bits.length; i++ ) s.append( byte2Binary( bits[ i ] ) );
	pw.print( s.length( (int)len ).append( "}" ) );
	return obs.write( bits, len );
}
 
Example 16
public boolean next( final MutableString word, final MutableString nonWord ) throws IOException {
	int i;
	final char buffer[] = this.buffer;

	if ( noMoreCharacters() ) return false;

	word.length( 0 );
	nonWord.length( 0 );

	for(;;) {
		for( i = 0; i < avail && isWordConstituent( buffer[ pos + i ] ); i++ );

		word.append( buffer, pos, i  );
		pos += i; 
		avail -= i;
		
		if ( avail > 0 || noMoreCharacters() ) break;
	}
	
	if ( noMoreCharacters() ) return true;

	for(;;) {
		for( i = 0; i < avail && ! isWordConstituent( buffer[ pos + i ] ); i++ );

		nonWord.append( buffer, pos, i  );
		pos += i; 
		avail -= i;

		if ( avail > 0 || noMoreCharacters() ) return true;
	}
}
 
Example 17
/**
 * @param b Buffer to append to.
 * @param str String to append if not null.
 * @param substr Suffix or prefix to use if <code>str</code> is not null.
 * @param suffix True if <code>substr</code> is a suffix.
 */
private void appendNonNull(MutableString b, String str, String substr,
        boolean suffix) {
    if (str != null && str.length() > 0) {
        if (!suffix) {
            b.append(substr);
        }
        b.append(str);
        if (suffix) {
            b.append(substr);
        }
    }
}
 
Example 18
Source Project: tagme   File: WikiTextExtractor.java    License: Apache License 2.0 4 votes vote down vote up
public MutableString removeStructure(MutableString input, boolean onlyAbstract)
	{
		
		MutableString buffer = new MutableString(1024);
		FastBufferedReader tokenizer = new FastBufferedReader(input);
		
		MutableString text = new MutableString(2048);
		String punts = ":.;,-";
		
		try {
			while(tokenizer.readLine(buffer) != null)
			{
				if (text.length() > MIN_ABSTRACT_CHARS && onlyAbstract){
					text.deleteCharAt(text.length()-1);
					return text;					
				}
				
//				MutableString linestr = new MutableString(buffer.trim());
				MutableString linestr = buffer.trim();
				if (linestr.length() == 0) continue;
				
				int start;
				int end;
				String chars;
				char[] line = linestr.array();
				int line_len = linestr.length();
				
				char first = linestr.charAt(0);
				switch (first)
				{
				case '=':{
					chars = " =";
					for(start=0; start <line_len && chars.indexOf(line[start])>=0; start++);
					for(end=line_len-1; end >= 0  && chars.indexOf(line[end])>=0; end--);
					
					if (start < end){
						text.append(linestr.subSequence(start, end+1));
						text.append(". ");
					}
					break;
				}
					
				case '*':
				case '#':
				case ':':
				case ';':{
					
					chars = "*#:; ";
					for(start=0; start<line_len && chars.indexOf(line[start])>=0 ; start++);
					
					if (start < line_len-1){
						text.append(linestr.subSequence(start, linestr.length()));
						if (punts.indexOf(text.lastChar())<0)
							text.append('.');
						text.append(' ');
					}
					
					break;
				}
				case '{':
				case '|':
					break;
				case '.':
				case '-':{
					linestr.delete(new char[]{'.','-'});
					if (linestr.length() > 0){
						text.append(linestr);
						if (punts.indexOf(text.lastChar())<0)
							text.append('.');
						text.append(' ');
					}
					break;
				}
				default:{
					if (linestr.lastChar() == '}')
						break;
					text.append(linestr);
					if (punts.indexOf(text.lastChar())<0)
						text.append('.');
					text.append(' ');
				}
				}
			}
		} catch (IOException e) {}
		if (text.length()>0) text.deleteCharAt(text.length()-1);
		return text;
	}
 
Example 19
Source Project: tagme   File: DisambiguationDebugger.java    License: Apache License 2.0 4 votes vote down vote up
public String toString(AnnotatedText input, String lang)
{
	try {
		TopicSearcher topic = new TopicSearcher(lang);
		MutableString buf = new MutableString();
		
		for(Annotation a :votes.keySet())
		{
			buf.append('\n');
			buf.append(String.format("Annotation [%s] (lp %.4f - links %d) > %s\n", input.getOriginalText(a), a.getAnchor().lp(), a.getAnchor().links(),
					(a.isDisambiguated()? topic.getTitle(a.getTopic()) : "N/A")));

			if (a.isIgnored() || a.isPruned())
			{
				buf.append("  PRUNED\n");
				continue;
			}
			for (int page_idx=0; page_idx<a.anchor.ambiguity(); page_idx++)
			{
				if (votes.get(a)[page_idx].status < 0)
				{
					buf.append(String.format("  Page [%d - %s] pruned: %d\n", a.anchor.pageByIndex(page_idx), topic.getTitle(a.anchor.pageByIndex(page_idx)), votes.get(a)[page_idx].status));
					continue;
				}
				Votes v = votes.get(a)[page_idx];
				if (v.sum == 0)
				{
					buf.append(String.format("  Page [%d - %s] zero-votes: %d\n", a.anchor.pageByIndex(page_idx), topic.getTitle(a.anchor.pageByIndex(page_idx)), votes.get(a)[page_idx].status));
					continue;
				}

				buf.append(String.format("  Page [%d - %s] %f\n", a.anchor.pageByIndex(page_idx), topic.getTitle(a.anchor.pageByIndex(page_idx)), v.sum));
				
				for (Annotation b : v.keySet())
				{
					buf.append(String.format("    Anchor [%s] (lp %.4f - links %d)\n", input.getOriginalText(b), b.getAnchor().lp(), b.getAnchor().links()));
					float[] b_votes = v.get(b);
					for (int idxb=0; idxb<b_votes.length; idxb++)
					{
						if (b_votes[idxb]>0)
							buf.append(String.format("      Vote Page [%d - %s]: %f\n", b.anchor.pageByIndex(idxb), topic.getTitle(b.anchor.pageByIndex(idxb)), b_votes[idxb]));
					}
				}
			}
			buf.append("\n\n");
		}
		
		return buf.toString();
	} catch (IOException ioe) {
		throw new RuntimeException(ioe);
	}
	
}
 
Example 20
/** Reads a line into the given mutable string.
 *
 * <P>The next line of input (defined as in {@link java.io.BufferedReader#readLine()})
 * will be stored into <code>s</code>. Note that if <code>s</code> is 
 * not {@linkplain it.unimi.dsi.lang.MutableString loose}
 * this method will be quite inefficient.
 *
 * @param s a mutable string that will be used to store the next line (which could be empty).
 * @return <code>s</code>, or <code>null</code> if the end of file was found, in which
 * case <code>s</code> is unchanged.
 */

public MutableString readLine( final MutableString s ) throws IOException {
	char c = 0;
	int i;

	if ( noMoreCharacters() ) return null;

	s.length( 0 );

	for(;;) {
		for( i = 0; i < avail && ( c = buffer[ pos + i ] ) != '\n' && c != '\r' ; i++ );

		s.append( buffer, pos, i  );
		pos += i; 
		avail -= i;

		if ( avail > 0 ) {
			if ( c == '\n' ) { // LF only.
				pos++;
				avail--;
			}
			else { // c == '\r'
				pos++;
				avail--;
				if ( avail > 0 ) {
					if ( buffer[ pos ] == '\n' ) { // CR/LF with LF already in the buffer.
						pos ++;
						avail--;
					}
				}
				else { // We must search for the LF.
					if ( noMoreCharacters() ) return s;
					if ( buffer[ 0 ] == '\n' ) {
						pos++;
						avail--;
					}
				}
			}
			return s;
		}
		else if ( noMoreCharacters() ) return s;
	}
}