it.unimi.dsi.lang.MutableString Java Examples

The following examples show how to use it.unimi.dsi.lang.MutableString. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: InternedMutableStringSet.java    From database with GNU General Public License v2.0 6 votes vote down vote up
/** Returns an interned, canonical copy contained in this set of the specified mutable string.
*
* <p>The semantics of this method is essentially the same as that of
* {@link java.util.Collection#add(Object)}, but 
* this method will return a mutable string
* equal to <code>s</code> currently in this set. The string will
* <em>never</em> be <code>s</code>, as in the case <code>s</code> is
* not in this set a {@linkplain MutableString#compact() compact copy}
* of <code>s</code> will be stored instead.
*
* <p>The purpose of this method is similar to that of {@link String#intern()},
* but obviously here the user has much greater control.
*
* @param s the mutable string that must be interned.
* @return the mutable string equal to <code>s</code> stored in this set.
*/

public Term intern( final MutableString s ) {
	// Duplicate code from add()--keep in line!
	final int i = findInsertionPoint( s );
	if ( i < 0 ) return (Term)(key[ -( i + 1 ) ]);

	if ( state[ i ] == FREE ) free--;
	state[ i ] = OCCUPIED;
	final Term t = (Term)( key[ i ] = new Term( s ) );

	if ( ++count >= maxFill ) {
		int newP = Math.min( p + growthFactor(), PRIMES.length - 1 );
		// Just to be sure that size changes when p is very small.
		while( PRIMES[ newP ] == PRIMES[ p ] ) newP++;
		rehash( newP ); // Table too filled, let's rehash
	}
	if ( free == 0 ) rehash( p );
	return t;
}
 
Example #2
Source File: MockFetchedResponses.java    From BUbiNG with Apache License 2.0 6 votes vote down vote up
public void fetch(URI uri) throws IOException {
	this.url = uri;
	MutableString s = new MutableString();
	s.append("<html><head><title>").append(uri).append("</title></head>\n");
	s.append("<body>\n");

	try {
		final int host = Integer.parseInt(uri.getHost());
		final int page = Integer.parseInt(uri.getRawPath().substring(1));
		final Random random = new Random(host << 32 | page);
		for(int i = 0; i < 10; i++)
			s.append("<a href=\"http://").append(host).append('/').append(random.nextInt(10000)).append("\">Link ").append(i).append("</a>\n");
		s.append("<a href=\"http://").append(random.nextInt(1000)).append('/').append(random.nextInt(10000)).append("\">External link ").append("</a>\n");

	}
	catch(NumberFormatException e) {}
	s.append("</body></html>\n");
	inspectableBufferedInputStream.write(ByteBuffer.wrap(Util.toByteArray(s.toString())));
}
 
Example #3
Source File: ImmutableBinaryTrie.java    From database with GNU General Public License v2.0 6 votes vote down vote up
private void recToString( final Node n, final MutableString printPrefix, final MutableString result, final MutableString path, final int level ) {
	if ( n == null ) return;
	
	//System.err.println( "Called with prefix " + printPrefix );
	
	result.append( printPrefix ).append( '(' ).append( level ).append( ')' );
	
	if ( n.path != null ) {
		path.append( LongArrayBitVector.wrap( n.path, n.pathLength ) );
		result.append( " path:" ).append( LongArrayBitVector.wrap( n.path, n.pathLength ) );
	}
	if ( n.word >= 0 ) result.append( " word: " ).append( n.word ).append( " (" ).append( path ).append( ')' );

	result.append( '\n' );
	
	path.append( '0' );
	recToString( n.left, printPrefix.append( '\t' ).append( "0 => " ), result, path, level + 1 );
	path.charAt( path.length() - 1, '1' ); 
	recToString( n.right, printPrefix.replace( printPrefix.length() - 5, printPrefix.length(), "1 => "), result, path, level + 1 );
	path.delete( path.length() - 1, path.length() ); 
	printPrefix.delete( printPrefix.length() - 6, printPrefix.length() );
	
	//System.err.println( "Path now: " + path + " Going to delete from " + ( path.length() - n.pathLength));
	
	path.delete( path.length() - n.pathLength, path.length() );
}
 
Example #4
Source File: FastBufferedReader.java    From database with GNU General Public License v2.0 5 votes vote down vote up
public boolean next( final MutableString word, final MutableString nonWord ) throws IOException {
	int i;
	final char buffer[] = this.buffer;

	if ( noMoreCharacters() ) return false;

	word.length( 0 );
	nonWord.length( 0 );

	for(;;) {
		for( i = 0; i < avail && isWordConstituent( buffer[ pos + i ] ); i++ );

		word.append( buffer, pos, i  );
		pos += i; 
		avail -= i;
		
		if ( avail > 0 || noMoreCharacters() ) break;
	}
	
	if ( noMoreCharacters() ) return true;

	for(;;) {
		for( i = 0; i < avail && ! isWordConstituent( buffer[ pos + i ] ); i++ );

		nonWord.append( buffer, pos, i  );
		pos += i; 
		avail -= i;

		if ( avail > 0 || noMoreCharacters() ) return true;
	}
}
 
Example #5
Source File: RuntimeConfiguration.java    From BUbiNG with Apache License 2.0 5 votes vote down vote up
private static URI handleSeedURL(final MutableString s) {
	final URI url = BURL.parse(s);
	if (url != null) {
		if (url.isAbsolute()) return url;
		else LOGGER.error("The seed URL " + s + " is relative");
	}
	else LOGGER.error("The seed URL " + s + " is malformed");
	return null;
}
 
Example #6
Source File: ImmutableExternalPrefixMap.java    From database with GNU General Public License v2.0 5 votes vote down vote up
private long getIndex( final Object o ) {
	final CharSequence term = (CharSequence)o;
	ensureStream();
	// If term contains any character not coded by the prefix coder, we can return -1
	if ( ! isEncodable( term ) ) return -1;

	/* If term is in the map, any string extending term must follow term. Thus,
	 * term can be in the map only if it can be found in the left block
	 * of an approximated interval for itself. */
	Interval interval = intervalApproximator.getApproximatedInterval( term );
	if ( interval == Intervals.EMPTY_INTERVAL ) return -1;
	try {
		dumpStream.position( blockOffset[ interval.left ] * blockSize );
		dumpStream.readBits( 0 );
		iteratorIsUsable = false;
		MutableString s = new MutableString();
		int suffixLength, prefixLength = -1, count = blockStart[ interval.left ], blockEnd = blockStart[ interval.left + 1 ];

		/* We scan the dump file, stopping if we exhaust the block */
		while( count < blockEnd ) {
			if ( prefixLength < 0 ) prefixLength = 0;
			else prefixLength = dumpStream.readUnary();
			suffixLength = dumpStream.readUnary();
			s.delete( prefixLength, s.length() );
			s.length( prefixLength + suffixLength );
			for( int i = 0; i < suffixLength; i++ ) s.charAt( i + prefixLength, symbol2char[ decoder.decode( dumpStream ) ] );
			if ( s.equals( term ) ) return count;
			count++;
		}
		
		return -1;
	}
	catch (IOException rethrow ) {
		throw new RuntimeException( rethrow );
	}
}
 
Example #7
Source File: RuntimeConfiguration.java    From BUbiNG with Apache License 2.0 5 votes vote down vote up
/** Adds a (or a set of) new IPv4 to the black list; the IPv4 can be specified directly or it can be a file (prefixed by
 *  <code>file:</code>).
 *
 * @param spec the specification (an IP address, or a file prefixed by <code>file</code>).
 * @throws ConfigurationException
 * @throws FileNotFoundException
 */
public void addBlackListedIPv4(final String spec) throws ConfigurationException, FileNotFoundException {
		if (spec.length() == 0) return; // Skip empty specs
		if (spec.startsWith("file:")) {
			final LineIterator lineIterator = new LineIterator(new FastBufferedReader(new InputStreamReader(new FileInputStream(spec.substring(5)), Charsets.ISO_8859_1)));
			while (lineIterator.hasNext()) {
				final MutableString line = lineIterator.next();
				if (line.length() > 0) blackListedIPv4Addresses.add(handleIPv4(line.toString()));
			}
		}
		else blackListedIPv4Addresses.add(handleIPv4(spec));
}
 
Example #8
Source File: ShiftAddXorSignedStringMap.java    From database with GNU General Public License v2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public static void main( final String[] arg ) throws NoSuchMethodException, IOException, JSAPException, ClassNotFoundException {

	final SimpleJSAP jsap = new SimpleJSAP( ShiftAddXorSignedStringMap.class.getName(), "Builds a shift-add-xor signed string map by reading a newline-separated list of strings and a function built on the same list of strings.",
			new Parameter[] {
		new FlaggedOption( "bufferSize", JSAP.INTSIZE_PARSER, "64Ki", JSAP.NOT_REQUIRED, 'b',  "buffer-size", "The size of the I/O buffer used to read strings." ),
		new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The string file encoding." ),
		new Switch( "zipped", 'z', "zipped", "The string list is compressed in gzip format." ),
		new FlaggedOption( "width", JSAP.INTEGER_PARSER, Integer.toString( Integer.SIZE ), JSAP.NOT_REQUIRED, 'w', "width", "The signature width in bits." ),
		new UnflaggedOption( "function", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename of the function to be signed." ),
		new UnflaggedOption( "map", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename of the resulting serialised signed string map." ),
		new UnflaggedOption( "stringFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "Read strings from this file instead of standard input." ),
	});

	JSAPResult jsapResult = jsap.parse( arg );
	if ( jsap.messagePrinted() ) return;

	final int bufferSize = jsapResult.getInt( "bufferSize" );
	final String functionName = jsapResult.getString( "function" );
	final String mapName = jsapResult.getString( "map" );
	final String stringFile = jsapResult.getString( "stringFile" );
	final Charset encoding = (Charset)jsapResult.getObject( "encoding" );
	final int width = jsapResult.getInt( "width" );
	final boolean zipped = jsapResult.getBoolean( "zipped" );

	final InputStream inputStream = stringFile != null ? new FileInputStream( stringFile ) : System.in;
	final Iterator<MutableString> iterator = new LineIterator( new FastBufferedReader( new InputStreamReader( zipped ? new GZIPInputStream( inputStream ) : inputStream, encoding ), bufferSize ) );
	final Object2LongFunction<CharSequence> function = (Object2LongFunction<CharSequence>)BinIO.loadObject( functionName );
	LOGGER.info( "Signing..." );
	BinIO.storeObject( new ShiftAddXorSignedStringMap( iterator, function, width ), mapName );
	LOGGER.info( "Completed." );
}
 
Example #9
Source File: FileLinesCollection.java    From database with GNU General Public License v2.0 5 votes vote down vote up
/** Returns all lines of the file wrapped by this file-lines collection.
 * 
 * @return all lines of the file wrapped by this file-lines collection.
 */

public ObjectList<MutableString> allLines() {
	final ObjectArrayList<MutableString> result = new ObjectArrayList<MutableString>();
	for( Iterator<MutableString> i = iterator(); i.hasNext(); ) result.add( i.next().copy() );
	return result;
}
 
Example #10
Source File: AnnotatedText.java    From tagme with Apache License 2.0 5 votes vote down vote up
/** Create an annotated text from a given string, skipping all leading chars that are not letters nor digits.
 * @param text the annotated text.
 */
public AnnotatedText(String text){
	/*while(removedLeadingChars<text.length() && !Character.isLetterOrDigit(text.charAt(removedLeadingChars)))
		removedLeadingChars++;
	this.original = new MutableString(text.substring(removedLeadingChars,text.length()));
	*/
	this.original=new MutableString(text);
}
 
Example #11
Source File: TextPattern.java    From database with GNU General Public License v2.0 5 votes vote down vote up
/** Creates a new {@link TextPattern} object that can be used to search for the given pattern.
 *
 * @param pattern the constant pattern to search for.
 * @param flags a bit mask that may include {@link #CASE_INSENSITIVE} and {@link #UNICODE_CASE}.
 */
public TextPattern( final CharSequence pattern, final int flags ) {
	this.pattern = new char[ pattern.length() ];
	MutableString.getChars( pattern, 0, this.pattern.length, this.pattern, 0 );
	caseSensitive = ( flags & CASE_INSENSITIVE ) == 0;
	asciiCase = ( flags & UNICODE_CASE ) == 0;
	if ( ! caseSensitive ) {
		int i = this.pattern.length;
		if ( asciiCase ) while( i-- != 0 ) this.pattern[ i ] = asciiToLowerCase( this.pattern[ i ] );
		else while( i-- != 0 ) this.pattern[ i ] = unicodeToLowerCase( this.pattern[ i ] );
	}
	compile();
}
 
Example #12
Source File: LiterallySignedStringMap.java    From database with GNU General Public License v2.0 5 votes vote down vote up
/** Creates a new shift-add-xor signed string map using a given hash map.
 * 
 * @param function a function mapping each string in <code>list</code> to its ordinal position.
 * @param list a list of strings.
 */

public LiterallySignedStringMap( final Object2LongFunction<? extends CharSequence> function, final ObjectList<? extends MutableString> list ) {
	this.function = function;
	this.list = list;
	size = list.size();
	for( int i = 0; i < size; i++ ) if ( function.getLong( list.get( i ) ) != i ) throw new IllegalArgumentException( "Function and list do not agree" );
	defRetValue = -1;
}
 
Example #13
Source File: DebugInputBitStream.java    From database with GNU General Public License v2.0 5 votes vote down vote up
@Override
public void read( byte[] bits, int len ) throws IOException {
	ibs.read( bits, len );
	MutableString s = new MutableString( " {" );
	for( int i = 0; i < bits.length; i++ ) s.append( DebugOutputBitStream.byte2Binary( bits[ i ] ) );
	pw.print( s.length( len ).append( "}" ) );
}
 
Example #14
Source File: BulletParserTest.java    From database with GNU General Public License v2.0 5 votes vote down vote up
public void testScanEntityAtEndOfArray() {
	VisibleBulletParser parser = new VisibleBulletParser();
	
	char[] test = "&test".toCharArray();
	assertEquals( -1, parser.scanEntity( test, 0, test.length, false, new MutableString() ) );
	assertEquals( -1, parser.scanEntity( test, 0, test.length, true, new MutableString() ) );
	test = "&apos".toCharArray();
	assertEquals( -1, parser.scanEntity( test, 0, test.length, false, new MutableString() ) );
	assertEquals( 5, parser.scanEntity( test, 0, test.length, true, new MutableString() ) );
}
 
Example #15
Source File: Chars.java    From tagme with Apache License 2.0 5 votes vote down vote up
/**Convert sequence of characters from UTF-16 to ASCII.
 * @param input the sequence to convert.
 * @return the string converted to ASCII
 */
public static MutableString toNormalizedASCII(CharSequence input)
{
	int len = input.length();
	MutableString s = new MutableString(len+1);
	for(int i=0; i<len; i++)
		s.append(UTF16toASCII.MAP[(int)input.charAt(i)]);
	return s;
}
 
Example #16
Source File: Chars.java    From tagme with Apache License 2.0 5 votes vote down vote up
/** Convert a string from UTF-16 to ASCII.
 * @param input the string to convert.
 */
public static void normalizeASCII(MutableString input)
{
	char[] chars = input.array();
	int len = input.length();
	for(int i=0; i<len; i++)
		chars[i] = UTF16toASCII.MAP[(int)chars[i]];
}
 
Example #17
Source File: WikipediaAnchorParser.java    From tagme with Apache License 2.0 5 votes vote down vote up
/**
 * Remove all punctuations for an anchor, i.e. remove all but letters, digits and whitespaces
 *
 * @param input
 * @param ignoreChars A set of character (no digits, no letters) that are ignored when removing
 * @param ignoreSequences if true, it does not remove sequences of the same characters i.e. '!!!'
 * @return A new MutableString
 */
public static MutableString removePunctuations(MutableString input, String ignoreChars, boolean ignoreSequences)
{
	int len = input.length();
	char[] array = input.array();
	MutableString norm = new MutableString(len);

	int i=0, last=0;
	while(i<len)
	{
		while(i<len && (
				Character.isLetter(array[i]) ||
				Character.isDigit(array[i]) ||
				(ignoreChars!=null && ignoreChars.indexOf(array[i])>=0) ||
				(ignoreSequences && !Character.isWhitespace(array[i]) && (i>0 && array[i-1]==array[i] || i<len-1 && array[i+1]==array[i]) )
				))
			i++;

		if (i>last) {
			if (norm.length() > 0) norm.append(' ');
			norm.append(array, last, i-last);
		}

		while(i<len && !(
				Character.isLetter(array[i]) ||
				Character.isDigit(array[i]) ||
				(ignoreChars!=null && ignoreChars.indexOf(array[i])>=0) ||
				(ignoreSequences && !Character.isWhitespace(array[i]) && (i>0 && array[i-1]==array[i] || i<len-1 && array[i+1]==array[i]) )
				)){
			i++;
			last=i;
		}
	}
	return norm;
}
 
Example #18
Source File: WikiTextExtractor.java    From tagme with Apache License 2.0 5 votes vote down vote up
public List<WikiLink> extractDisambiguationLinks(MutableString cleanText)
	{
		FastBufferedReader tokenizer = new FastBufferedReader(cleanText);
		MutableString buffer = new MutableString(1024);
		List<WikiLink> links = new ArrayList<WikiLink>();
		
		try {
			while(tokenizer.readLine(buffer) != null)
			{
				buffer.trim();
				if (buffer.length() == 0) continue;
				
				if (buffer.charAt(0) == '*')
				{
					int start = 1;
					for(; start<buffer.length() && buffer.charAt(start)=='*' ; start++);
					buffer.delete(start, buffer.length()).trim();
					
					if (buffer.length() == 0) continue;
//					if (!buffer.startsWith("[[")) continue;
					
					List<WikiLink> lineLinks = extractLinkFromCleanedLine(buffer);
					if (lineLinks.size()>0) links.add(lineLinks.get(0));
				}
			}
		} catch (IOException ioe){}
		
		return links;
		
	}
 
Example #19
Source File: SQLWikiParser.java    From tagme with Apache License 2.0 5 votes vote down vote up
static String readToken(InputStreamReader r) throws IOException
{
    MutableString b = new MutableString();
    int c = r.read();
    while (c != ' ' && c != '\n' && c != -1)
    {
        b.append((char)c);
        c = r.read();
    }
    return b.toString();
}
 
Example #20
Source File: BuildRepetitionSet.java    From BUbiNG with Apache License 2.0 5 votes vote down vote up
public static void main(String[] arg) throws IOException {
	if (arg.length == 0) {
		System.err.println("Usage: " + BuildRepetitionSet.class.getSimpleName() + " REPETITIONSET");
		System.exit(1);
	}

	final FastBufferedReader fastBufferedReader = new FastBufferedReader(new InputStreamReader(System.in, Charsets.US_ASCII));
	final MutableString s = new MutableString();
	final LongOpenHashSet repeatedSet = new LongOpenHashSet();
	final String outputFilename = arg[0];
	final ProgressLogger pl = new ProgressLogger();

	MutableString lastUrl = new MutableString();
	pl.itemsName = "lines";
	pl.start("Reading... ");
	while(fastBufferedReader.readLine(s) != null) {
		final int firstTab = s.indexOf('\t');
		final int secondTab = s.indexOf('\t', firstTab + 1);
		MutableString url = s.substring(secondTab + 1);
		if (url.equals(lastUrl)) {
			final int storeIndex = Integer.parseInt(new String(s.array(), 0, firstTab));
			final long storePosition = Long.parseLong(new String(s.array(), firstTab + 1, secondTab - firstTab - 1));
			repeatedSet.add((long)storeIndex << 48 | storePosition);
			System.out.print(storeIndex);
			System.out.print('\t');
			System.out.print(storePosition);
			System.out.print('\t');
			System.out.println(url);
		}

		lastUrl = url;
		pl.lightUpdate();
	}

	pl.done();

	fastBufferedReader.close();
	BinIO.storeObject(repeatedSet, outputFilename);
}
 
Example #21
Source File: WikiTextExtractor.java    From tagme with Apache License 2.0 5 votes vote down vote up
public MutableString extractPageAndLink(MutableString input, List<WikiLink> links)
{
	LinkCatcher catchingLink = new LinkCatcher();
	MutableString cleaned = clean(input, catchingLink);
	links.addAll(catchingLink.links);
	return removeStructure(cleaned, false);
}
 
Example #22
Source File: Element.java    From database with GNU General Public License v2.0 5 votes vote down vote up
/** Creates a new element.
 *
 * @param name the name of the type of the new element.
 * @param breaksFlow true if this elements breaks the flow.
 * @param isSimple true if this element is simple.
 * @param isImplicit true if this element has implicit closure.
 */
public Element( final CharSequence name, final boolean breaksFlow, final boolean isSimple, final boolean isImplicit ) {
	this.name = new MutableString( name );
	this.nameLength = name.length();
	this.breaksFlow = breaksFlow;
	this.isSimple = isSimple;
	this.isImplicit = isImplicit;
	this.contentModel = new ReferenceLinkedOpenHashSet<Element>( Hash.DEFAULT_INITIAL_SIZE, .5f );
}
 
Example #23
Source File: DebugOutputBitStream.java    From database with GNU General Public License v2.0 5 votes vote down vote up
static MutableString byte2Binary( int x ) {
	MutableString s = new MutableString();
	for( int i = 0 ; i < 8; i++ ) {
		s.append( (char)( '0' + ( x % 2 ) ) );
		x >>= 1;
	}
	return s.reverse();
}
 
Example #24
Source File: BulletParser.java    From database with GNU General Public License v2.0 5 votes vote down vote up
/** Searches for the end of an entity.
 * 
 * <P>This method will search for the end of an entity starting at the given offset (the offset
 * must correspond to the ampersand).
 * 
 * <P>Real-world HTML pages often contain hundreds of misplaced ampersands, due to the
 * unfortunate idea of using the ampersand as query separator (<em>please</em> use the comma
 * in new code!). All such ampersand should be specified as <samp>&amp;amp;</samp>. 
 * If named entities are delimited using a transition
 * from alphabetical to non-alphabetical characters, we can easily get false positives. If the parameter
 * <code>loose</code> is false, named entities can be delimited only by whitespace or by a comma.
 * 
 * @param a a character array containing the entity.
 * @param offset the offset at which the entity starts (the offset must point at the ampersand).
 * @param length an upper bound to the maximum returned position.
 * @param loose if true, named entities can be terminated by any non-alphabetical character 
 * (instead of whitespace or comma).
 * @param entity a support mutable string used to query {@link ParsingFactory#getEntity(MutableString)}.
 * @return the position of the last character of the entity, or -1 if no entity was found.
 */
protected int scanEntity( final char[] a, final int offset, final int length, final boolean loose, final MutableString entity ) {

	int i, c = 0;
	String tmpEntity;

	if ( length < 2 ) return -1;
	
	if ( a[ offset + 1 ] == '#' ) {
		if ( length > 2 && a[ offset + 2 ] == 'x' ) {
			for( i = 3; i < length && i < MAX_HEX_ENTITY_LENGTH && Character.digit( a[ i + offset ], HEXADECIMAL ) != -1; i++ );
			tmpEntity =  new String( a, offset + 3, i - 3 );
			if ( i != 3 ) c = Integer.parseInt( tmpEntity, HEXADECIMAL );
		}
		else {
			for( i = 2; i < length && i < MAX_DEC_ENTITY_LENGTH && Character.isDigit( a[ i + offset ] ); i++ );
			tmpEntity = new String( a, offset + 2, i - 2 );
			if ( i != 2 ) c = Integer.parseInt( tmpEntity );
		}
		
		if ( c > 0 && c < MAX_ENTITY_VALUE ) {
			lastEntity = (char)c;
			if ( i < length && a[ i + offset ] == ';' ) i++;
			return i + offset;
		}
	}
	else {
		if ( Character.isLetter( a[ offset + 1 ] ) ) {
			for( i = 2; i < length && Character.isLetterOrDigit( a[ offset + i ] ); i++ );
			if ( i != 1 && ( loose || ( i < length && ( Character.isWhitespace( a[ offset + i ] ) || a[ offset + i ] == ';' ) ) ) && ( lastEntity = entity2Char( entity.length( 0 ).append( a, offset + 1, i - 1 ) ) ) != 0 ) {
				if ( i < length && a[ i + offset ] == ';' ) i++;
				return i + offset;
			}
		}
	}

	return -1;
}
 
Example #25
Source File: SpamTextProcessor.java    From BUbiNG with Apache License 2.0 5 votes vote down vote up
@Override
public Appendable append(char c) throws IOException {
	final short index = (short)termSetOnthology.getLong(new MutableString().append(Character.toLowerCase(c)));
	if (index != -1) {
		final short oldValue = termCount.get(index);
		if (oldValue < Short.MAX_VALUE) termCount.put(index, (short)(oldValue + 1));
	}

	return this;
}
 
Example #26
Source File: ExternalSortUtils.java    From tagme with Apache License 2.0 5 votes vote down vote up
public static CharSequence formatFloatArray(float[] data, int precision){
	MutableString buf = new MutableString(data.length*2+1);
	buf.append('[');
	for(int i=0;i<data.length; i++){
		if (i>0) buf.append(',');
		buf.append(String.format("%."+precision+"f", data[i]));
	}
	buf.append(']');
	return buf;
}
 
Example #27
Source File: ExternalSortUtils.java    From tagme with Apache License 2.0 5 votes vote down vote up
public static CharSequence formatDoubleArray(double[] data, int precision){
	MutableString buf = new MutableString(data.length*2+1);
	buf.append('[');
	for(int i=0;i<data.length; i++){
		if (i>0) buf.append(',');
		buf.append(String.format("%."+precision+"f", data[i]));
	}
	buf.append(']');
	return buf;
}
 
Example #28
Source File: WellFormedXmlFactory.java    From database with GNU General Public License v2.0 5 votes vote down vote up
public Element getElement( final MutableString name ) {
	Element element = name2Element.get(name);
	if ( element == null ) {
		element = new Element(name);
		name2Element.put(element.name, element );
	}
	return element;
}
 
Example #29
Source File: KnowledgeBase.java    From fasten with Apache License 2.0 5 votes vote down vote up
/** Initializes the kryo instance used for serialization. */
private void initKryo() {
	kryo = new Kryo();
	kryo.register(BVGraph.class, new BVGraphSerializer(kryo));
	kryo.register(byte[].class);
	kryo.register(InputBitStream.class);
	kryo.register(NullInputStream.class);
	kryo.register(EliasFanoMonotoneLongBigList.class, new JavaSerializer());
	kryo.register(MutableString.class, new FieldSerializer<>(kryo, MutableString.class));
	kryo.register(Properties.class);
	kryo.register(long[].class);
	kryo.register(Long2IntOpenHashMap.class);
}
 
Example #30
Source File: SpamTextProcessor.java    From BUbiNG with Apache License 2.0 5 votes vote down vote up
private void process() throws IOException {
	final MutableString word = new MutableString(), nonWord = new MutableString();
	while (fbr.next(word, nonWord)) {
		final short index = (short)termSetOnthology.getLong(word.toLowerCase());
		if (index != -1) {
			final short oldValue = termCount.get(index);
			if (oldValue < Short.MAX_VALUE) termCount.put(index, (short)(oldValue + 1));
		}
	}
}