Java Code Examples for it.unimi.dsi.lang.MutableString

The following examples show how to use it.unimi.dsi.lang.MutableString. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
/** Returns an interned, canonical copy contained in this set of the specified mutable string.
*
* <p>The semantics of this method is essentially the same as that of
* {@link java.util.Collection#add(Object)}, but 
* this method will return a mutable string
* equal to <code>s</code> currently in this set. The string will
* <em>never</em> be <code>s</code>, as in the case <code>s</code> is
* not in this set a {@linkplain MutableString#compact() compact copy}
* of <code>s</code> will be stored instead.
*
* <p>The purpose of this method is similar to that of {@link String#intern()},
* but obviously here the user has much greater control.
*
* @param s the mutable string that must be interned.
* @return the mutable string equal to <code>s</code> stored in this set.
*/

public Term intern( final MutableString s ) {
	// Duplicate code from add()--keep in line!
	final int i = findInsertionPoint( s );
	if ( i < 0 ) return (Term)(key[ -( i + 1 ) ]);

	if ( state[ i ] == FREE ) free--;
	state[ i ] = OCCUPIED;
	final Term t = (Term)( key[ i ] = new Term( s ) );

	if ( ++count >= maxFill ) {
		int newP = Math.min( p + growthFactor(), PRIMES.length - 1 );
		// Just to be sure that size changes when p is very small.
		while( PRIMES[ newP ] == PRIMES[ p ] ) newP++;
		rehash( newP ); // Table too filled, let's rehash
	}
	if ( free == 0 ) rehash( p );
	return t;
}
 
Example 2
Source Project: BUbiNG   Source File: MockFetchedResponses.java    License: Apache License 2.0 6 votes vote down vote up
public void fetch(URI uri) throws IOException {
	this.url = uri;
	MutableString s = new MutableString();
	s.append("<html><head><title>").append(uri).append("</title></head>\n");
	s.append("<body>\n");

	try {
		final int host = Integer.parseInt(uri.getHost());
		final int page = Integer.parseInt(uri.getRawPath().substring(1));
		final Random random = new Random(host << 32 | page);
		for(int i = 0; i < 10; i++)
			s.append("<a href=\"http://").append(host).append('/').append(random.nextInt(10000)).append("\">Link ").append(i).append("</a>\n");
		s.append("<a href=\"http://").append(random.nextInt(1000)).append('/').append(random.nextInt(10000)).append("\">External link ").append("</a>\n");

	}
	catch(NumberFormatException e) {}
	s.append("</body></html>\n");
	inspectableBufferedInputStream.write(ByteBuffer.wrap(Util.toByteArray(s.toString())));
}
 
Example 3
Source Project: database   Source File: ImmutableBinaryTrie.java    License: GNU General Public License v2.0 6 votes vote down vote up
private void recToString( final Node n, final MutableString printPrefix, final MutableString result, final MutableString path, final int level ) {
	if ( n == null ) return;
	
	//System.err.println( "Called with prefix " + printPrefix );
	
	result.append( printPrefix ).append( '(' ).append( level ).append( ')' );
	
	if ( n.path != null ) {
		path.append( LongArrayBitVector.wrap( n.path, n.pathLength ) );
		result.append( " path:" ).append( LongArrayBitVector.wrap( n.path, n.pathLength ) );
	}
	if ( n.word >= 0 ) result.append( " word: " ).append( n.word ).append( " (" ).append( path ).append( ')' );

	result.append( '\n' );
	
	path.append( '0' );
	recToString( n.left, printPrefix.append( '\t' ).append( "0 => " ), result, path, level + 1 );
	path.charAt( path.length() - 1, '1' ); 
	recToString( n.right, printPrefix.replace( printPrefix.length() - 5, printPrefix.length(), "1 => "), result, path, level + 1 );
	path.delete( path.length() - 1, path.length() ); 
	printPrefix.delete( printPrefix.length() - 6, printPrefix.length() );
	
	//System.err.println( "Path now: " + path + " Going to delete from " + ( path.length() - n.pathLength));
	
	path.delete( path.length() - n.pathLength, path.length() );
}
 
Example 4
Source Project: fasten   Source File: KnowledgeBase.java    License: Apache License 2.0 5 votes vote down vote up
/** Initializes the kryo instance used for serialization. */
private void initKryo() {
	kryo = new Kryo();
	kryo.register(BVGraph.class, new BVGraphSerializer(kryo));
	kryo.register(byte[].class);
	kryo.register(InputBitStream.class);
	kryo.register(NullInputStream.class);
	kryo.register(EliasFanoMonotoneLongBigList.class, new JavaSerializer());
	kryo.register(MutableString.class, new FieldSerializer<>(kryo, MutableString.class));
	kryo.register(Properties.class);
	kryo.register(long[].class);
	kryo.register(Long2IntOpenHashMap.class);
}
 
Example 5
Source Project: tagme   Source File: Chars.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * @param word
 * @return word with the first character untouched and the others turned to lowercase.
 */
public static String capitalizeOnlyFirst(String word)
{
	MutableString s = new MutableString(word.length());
	s.append(word.charAt(0));
	for(int i=1; i<word.length(); i++)
	{
		char c = word.charAt(i);
		s.append(Character.toLowerCase(c));
	}
	return s.toString();
}
 
Example 6
Source Project: tagme   Source File: Chars.java    License: Apache License 2.0 5 votes vote down vote up
/** Convert a string from UTF-16 to ASCII.
 * @param input the string to convert.
 */
public static void normalizeASCII(MutableString input)
{
	char[] chars = input.array();
	int len = input.length();
	for(int i=0; i<len; i++)
		chars[i] = UTF16toASCII.MAP[(int)chars[i]];
}
 
Example 7
Source Project: tagme   Source File: Chars.java    License: Apache License 2.0 5 votes vote down vote up
/**Convert sequence of characters from UTF-16 to ASCII.
 * @param input the sequence to convert.
 * @return the string converted to ASCII
 */
public static MutableString toNormalizedASCII(CharSequence input)
{
	int len = input.length();
	MutableString s = new MutableString(len+1);
	for(int i=0; i<len; i++)
		s.append(UTF16toASCII.MAP[(int)input.charAt(i)]);
	return s;
}
 
Example 8
Source Project: tagme   Source File: AnnotatedText.java    License: Apache License 2.0 5 votes vote down vote up
/**Creates an annotated text object reading data from a reader.
 * @param reader where to read the data from.
 * @param length the maximum length of the text to read.
 * @throws IOException if an error occurred while reading from the reader.
 */
public AnnotatedText(Reader reader, int length) throws IOException
{
	original = new MutableString(length+1);
	char[] buffer = new char[length+1];
	int read = 0;
	while((read=reader.read(buffer, 0, length+1)) >= 0)
		original.append(buffer, 0, read);
}
 
Example 9
Source Project: tagme   Source File: AnnotatedText.java    License: Apache License 2.0 5 votes vote down vote up
/** Create an annotated text from a given string, skipping all leading chars that are not letters nor digits.
 * @param text the annotated text.
 */
public AnnotatedText(String text){
	/*while(removedLeadingChars<text.length() && !Character.isLetterOrDigit(text.charAt(removedLeadingChars)))
		removedLeadingChars++;
	this.original = new MutableString(text.substring(removedLeadingChars,text.length()));
	*/
	this.original=new MutableString(text);
}
 
Example 10
Source Project: database   Source File: DebugInputBitStream.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Override
public void read( byte[] bits, int len ) throws IOException {
	ibs.read( bits, len );
	MutableString s = new MutableString( " {" );
	for( int i = 0; i < bits.length; i++ ) s.append( DebugOutputBitStream.byte2Binary( bits[ i ] ) );
	pw.print( s.length( len ).append( "}" ) );
}
 
Example 11
Source Project: database   Source File: TextPattern.java    License: GNU General Public License v2.0 5 votes vote down vote up
/** Creates a new {@link TextPattern} object that can be used to search for the given pattern.
 *
 * @param pattern the constant pattern to search for.
 * @param flags a bit mask that may include {@link #CASE_INSENSITIVE} and {@link #UNICODE_CASE}.
 */
public TextPattern( final CharSequence pattern, final int flags ) {
	this.pattern = new char[ pattern.length() ];
	MutableString.getChars( pattern, 0, this.pattern.length, this.pattern, 0 );
	caseSensitive = ( flags & CASE_INSENSITIVE ) == 0;
	asciiCase = ( flags & UNICODE_CASE ) == 0;
	if ( ! caseSensitive ) {
		int i = this.pattern.length;
		if ( asciiCase ) while( i-- != 0 ) this.pattern[ i ] = asciiToLowerCase( this.pattern[ i ] );
		else while( i-- != 0 ) this.pattern[ i ] = unicodeToLowerCase( this.pattern[ i ] );
	}
	compile();
}
 
Example 12
Source Project: tagme   Source File: WikipediaAnchorParser.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Remove all punctuations for an anchor, i.e. remove all but letters, digits and whitespaces
 *
 * @param input
 * @param ignoreChars A set of character (no digits, no letters) that are ignored when removing
 * @param ignoreSequences if true, it does not remove sequences of the same characters i.e. '!!!'
 * @return A new MutableString
 */
public static MutableString removePunctuations(MutableString input, String ignoreChars, boolean ignoreSequences)
{
	int len = input.length();
	char[] array = input.array();
	MutableString norm = new MutableString(len);

	int i=0, last=0;
	while(i<len)
	{
		while(i<len && (
				Character.isLetter(array[i]) ||
				Character.isDigit(array[i]) ||
				(ignoreChars!=null && ignoreChars.indexOf(array[i])>=0) ||
				(ignoreSequences && !Character.isWhitespace(array[i]) && (i>0 && array[i-1]==array[i] || i<len-1 && array[i+1]==array[i]) )
				))
			i++;

		if (i>last) {
			if (norm.length() > 0) norm.append(' ');
			norm.append(array, last, i-last);
		}

		while(i<len && !(
				Character.isLetter(array[i]) ||
				Character.isDigit(array[i]) ||
				(ignoreChars!=null && ignoreChars.indexOf(array[i])>=0) ||
				(ignoreSequences && !Character.isWhitespace(array[i]) && (i>0 && array[i-1]==array[i] || i<len-1 && array[i+1]==array[i]) )
				)){
			i++;
			last=i;
		}
	}
	return norm;
}
 
Example 13
Source Project: tagme   Source File: WikipediaAnchorParser.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Manage dots, removing them if they are part of an abbreviation, or replacing them with
 * withespaces if they are the last char of a word
 * @param input
 * @return
 */
public static MutableString removeDots(MutableString input)
{
	int len = input.length();
	char[] array = input.array();
	MutableString res = new MutableString(len);

	boolean isLastDot = false;
	int i=0, last=0;
	while(i<len)
	{
		while(i<len && array[i]!='.' && !Character.isWhitespace(array[i]))
			i++;

		if (i>last) {
			if (isLastDot && res.length() > 0) res.append(' ');
			res.append(array, last, i-last);
		}

		isLastDot = false;
		while(i<len && (array[i]=='.' || Character.isWhitespace(array[i])))
		{
			if (Character.isWhitespace(array[i]) ||
					(i<len-2 && array[i+2]!='.' && !Character.isWhitespace(array[i+2])) ||
					(i==len-2 && i>1 && array[i-2]!='.' && !Character.isWhitespace(array[i-2]))
					)
				isLastDot = true;
			i++;
			last=i;
		}
	}
	return res;


}
 
Example 14
public ObjectListIterator<MutableString> listIterator( final int k ) { return new AbstractObjectListIterator<MutableString>() {
		ObjectListIterator<?> i = utf8 ? byteFrontCodedList.listIterator( k ) : charFrontCodedList.listIterator( k );
		
		public boolean hasNext() { return i.hasNext(); }
		public boolean hasPrevious() { return i.hasPrevious(); }
		public MutableString next() { return  MutableString.wrap( utf8 ? byte2Char( (byte[])i.next(), null ) : (char[])i.next() ); }
		public MutableString previous() { return MutableString.wrap( utf8 ? byte2Char( (byte[])i.next(), null ) :(char[])i.previous() ); }
		public int nextIndex() { return i.nextIndex(); }
		public int previousIndex() { return i.previousIndex(); }
	};
}
 
Example 15
Source Project: tagme   Source File: DisambiguationWIDs.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected IntSet parseSet() throws IOException
{
	final Pattern pattern = WikiPatterns.getPattern(lang, Type.DISAMB_CAT);
	final IntOpenHashSet ids = new IntOpenHashSet();
	SQLWikiParser parserCatLinks = new SQLWikiParser(log)
	{
		@Override
		public boolean compute(ArrayList<String> values) throws IOException
		{
			MutableString cat = new MutableString(values.get(SQLWikiParser.CATLINKS_TITLE_TO));
			cat = cleanPageName(cat).toLowerCase();
			if (pattern.matcher(cat).matches())
			{
				ids.add(Integer.parseInt(values.get(SQLWikiParser.CATLINKS_ID_FROM)));
				return true;
			}
			return false;
		}
	};
	File catLinks = WikipediaFiles.CAT_LINKS.getSourceFile(lang);
	InputStreamReader inCatLinks = new InputStreamReader(new FileInputStream(catLinks), Charset.forName("UTF-8"));
	parserCatLinks.compute(inCatLinks);
	inCatLinks.close();
	
	ids.trim();
	
	return ids;
}
 
Example 16
Source Project: tagme   Source File: SQLWikiParser.java    License: Apache License 2.0 5 votes vote down vote up
static String readToken(InputStreamReader r) throws IOException
{
    MutableString b = new MutableString();
    int c = r.read();
    while (c != ' ' && c != '\n' && c != -1)
    {
        b.append((char)c);
        c = r.read();
    }
    return b.toString();
}
 
Example 17
Source Project: database   Source File: FileLinesCollection.java    License: GNU General Public License v2.0 5 votes vote down vote up
/** Returns all lines of the file wrapped by this file-lines collection.
 * 
 * @return all lines of the file wrapped by this file-lines collection.
 */

public ObjectList<MutableString> allLines() {
	final ObjectArrayList<MutableString> result = new ObjectArrayList<MutableString>();
	for( Iterator<MutableString> i = iterator(); i.hasNext(); ) result.add( i.next().copy() );
	return result;
}
 
Example 18
@SuppressWarnings("unchecked")
public static void main( final String[] arg ) throws NoSuchMethodException, IOException, JSAPException, ClassNotFoundException {

	final SimpleJSAP jsap = new SimpleJSAP( ShiftAddXorSignedStringMap.class.getName(), "Builds a shift-add-xor signed string map by reading a newline-separated list of strings and a function built on the same list of strings.",
			new Parameter[] {
		new FlaggedOption( "bufferSize", JSAP.INTSIZE_PARSER, "64Ki", JSAP.NOT_REQUIRED, 'b',  "buffer-size", "The size of the I/O buffer used to read strings." ),
		new FlaggedOption( "encoding", ForNameStringParser.getParser( Charset.class ), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The string file encoding." ),
		new Switch( "zipped", 'z', "zipped", "The string list is compressed in gzip format." ),
		new FlaggedOption( "width", JSAP.INTEGER_PARSER, Integer.toString( Integer.SIZE ), JSAP.NOT_REQUIRED, 'w', "width", "The signature width in bits." ),
		new UnflaggedOption( "function", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename of the function to be signed." ),
		new UnflaggedOption( "map", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename of the resulting serialised signed string map." ),
		new UnflaggedOption( "stringFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "Read strings from this file instead of standard input." ),
	});

	JSAPResult jsapResult = jsap.parse( arg );
	if ( jsap.messagePrinted() ) return;

	final int bufferSize = jsapResult.getInt( "bufferSize" );
	final String functionName = jsapResult.getString( "function" );
	final String mapName = jsapResult.getString( "map" );
	final String stringFile = jsapResult.getString( "stringFile" );
	final Charset encoding = (Charset)jsapResult.getObject( "encoding" );
	final int width = jsapResult.getInt( "width" );
	final boolean zipped = jsapResult.getBoolean( "zipped" );

	final InputStream inputStream = stringFile != null ? new FileInputStream( stringFile ) : System.in;
	final Iterator<MutableString> iterator = new LineIterator( new FastBufferedReader( new InputStreamReader( zipped ? new GZIPInputStream( inputStream ) : inputStream, encoding ), bufferSize ) );
	final Object2LongFunction<CharSequence> function = (Object2LongFunction<CharSequence>)BinIO.loadObject( functionName );
	LOGGER.info( "Signing..." );
	BinIO.storeObject( new ShiftAddXorSignedStringMap( iterator, function, width ), mapName );
	LOGGER.info( "Completed." );
}
 
Example 19
Source Project: tagme   Source File: WikiTextExtractor.java    License: Apache License 2.0 5 votes vote down vote up
public List<WikiLink> extractDisambiguationLinks(MutableString cleanText)
	{
		FastBufferedReader tokenizer = new FastBufferedReader(cleanText);
		MutableString buffer = new MutableString(1024);
		List<WikiLink> links = new ArrayList<WikiLink>();
		
		try {
			while(tokenizer.readLine(buffer) != null)
			{
				buffer.trim();
				if (buffer.length() == 0) continue;
				
				if (buffer.charAt(0) == '*')
				{
					int start = 1;
					for(; start<buffer.length() && buffer.charAt(start)=='*' ; start++);
					buffer.delete(start, buffer.length()).trim();
					
					if (buffer.length() == 0) continue;
//					if (!buffer.startsWith("[[")) continue;
					
					List<WikiLink> lineLinks = extractLinkFromCleanedLine(buffer);
					if (lineLinks.size()>0) links.add(lineLinks.get(0));
				}
			}
		} catch (IOException ioe){}
		
		return links;
		
	}
 
Example 20
/** Creates a new shift-add-xor signed string map using a given hash map.
 * 
 * @param function a function mapping each string in <code>list</code> to its ordinal position.
 * @param list a list of strings.
 */

public LiterallySignedStringMap( final Object2LongFunction<? extends CharSequence> function, final ObjectList<? extends MutableString> list ) {
	this.function = function;
	this.list = list;
	size = list.size();
	for( int i = 0; i < size; i++ ) if ( function.getLong( list.get( i ) ) != i ) throw new IllegalArgumentException( "Function and list do not agree" );
	defRetValue = -1;
}
 
Example 21
Source Project: tagme   Source File: WikiTextExtractor.java    License: Apache License 2.0 5 votes vote down vote up
public MutableString extractPageAndLink(MutableString input, List<WikiLink> links)
{
	LinkCatcher catchingLink = new LinkCatcher();
	MutableString cleaned = clean(input, catchingLink);
	links.addAll(catchingLink.links);
	return removeStructure(cleaned, false);
}
 
Example 22
Source Project: tagme   Source File: ExternalSortUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static CharSequence formatFloatArray(float[] data, int precision){
	MutableString buf = new MutableString(data.length*2+1);
	buf.append('[');
	for(int i=0;i<data.length; i++){
		if (i>0) buf.append(',');
		buf.append(String.format("%."+precision+"f", data[i]));
	}
	buf.append(']');
	return buf;
}
 
Example 23
Source Project: tagme   Source File: ExternalSortUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static CharSequence formatDoubleArray(double[] data, int precision){
	MutableString buf = new MutableString(data.length*2+1);
	buf.append('[');
	for(int i=0;i<data.length; i++){
		if (i>0) buf.append(',');
		buf.append(String.format("%."+precision+"f", data[i]));
	}
	buf.append(']');
	return buf;
}
 
Example 24
Source Project: BUbiNG   Source File: SpamTextProcessor.java    License: Apache License 2.0 5 votes vote down vote up
private void process() throws IOException {
	final MutableString word = new MutableString(), nonWord = new MutableString();
	while (fbr.next(word, nonWord)) {
		final short index = (short)termSetOnthology.getLong(word.toLowerCase());
		if (index != -1) {
			final short oldValue = termCount.get(index);
			if (oldValue < Short.MAX_VALUE) termCount.put(index, (short)(oldValue + 1));
		}
	}
}
 
Example 25
Source Project: BUbiNG   Source File: SpamTextProcessor.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Appendable append(char c) throws IOException {
	final short index = (short)termSetOnthology.getLong(new MutableString().append(Character.toLowerCase(c)));
	if (index != -1) {
		final short oldValue = termCount.get(index);
		if (oldValue < Short.MAX_VALUE) termCount.put(index, (short)(oldValue + 1));
	}

	return this;
}
 
Example 26
Source Project: BUbiNG   Source File: BuildRepetitionSet.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] arg) throws IOException {
	if (arg.length == 0) {
		System.err.println("Usage: " + BuildRepetitionSet.class.getSimpleName() + " REPETITIONSET");
		System.exit(1);
	}

	final FastBufferedReader fastBufferedReader = new FastBufferedReader(new InputStreamReader(System.in, Charsets.US_ASCII));
	final MutableString s = new MutableString();
	final LongOpenHashSet repeatedSet = new LongOpenHashSet();
	final String outputFilename = arg[0];
	final ProgressLogger pl = new ProgressLogger();

	MutableString lastUrl = new MutableString();
	pl.itemsName = "lines";
	pl.start("Reading... ");
	while(fastBufferedReader.readLine(s) != null) {
		final int firstTab = s.indexOf('\t');
		final int secondTab = s.indexOf('\t', firstTab + 1);
		MutableString url = s.substring(secondTab + 1);
		if (url.equals(lastUrl)) {
			final int storeIndex = Integer.parseInt(new String(s.array(), 0, firstTab));
			final long storePosition = Long.parseLong(new String(s.array(), firstTab + 1, secondTab - firstTab - 1));
			repeatedSet.add((long)storeIndex << 48 | storePosition);
			System.out.print(storeIndex);
			System.out.print('\t');
			System.out.print(storePosition);
			System.out.print('\t');
			System.out.println(url);
		}

		lastUrl = url;
		pl.lightUpdate();
	}

	pl.done();

	fastBufferedReader.close();
	BinIO.storeObject(repeatedSet, outputFilename);
}
 
Example 27
Source Project: BUbiNG   Source File: RuntimeConfiguration.java    License: Apache License 2.0 5 votes vote down vote up
private static URI handleSeedURL(final MutableString s) {
	final URI url = BURL.parse(s);
	if (url != null) {
		if (url.isAbsolute()) return url;
		else LOGGER.error("The seed URL " + s + " is relative");
	}
	else LOGGER.error("The seed URL " + s + " is malformed");
	return null;
}
 
Example 28
Source Project: BUbiNG   Source File: RuntimeConfiguration.java    License: Apache License 2.0 5 votes vote down vote up
/** Adds a (or a set of) new IPv4 to the black list; the IPv4 can be specified directly or it can be a file (prefixed by
 *  <code>file:</code>).
 *
 * @param spec the specification (an IP address, or a file prefixed by <code>file</code>).
 * @throws ConfigurationException
 * @throws FileNotFoundException
 */
public void addBlackListedIPv4(final String spec) throws ConfigurationException, FileNotFoundException {
		if (spec.length() == 0) return; // Skip empty specs
		if (spec.startsWith("file:")) {
			final LineIterator lineIterator = new LineIterator(new FastBufferedReader(new InputStreamReader(new FileInputStream(spec.substring(5)), Charsets.ISO_8859_1)));
			while (lineIterator.hasNext()) {
				final MutableString line = lineIterator.next();
				if (line.length() > 0) blackListedIPv4Addresses.add(handleIPv4(line.toString()));
			}
		}
		else blackListedIPv4Addresses.add(handleIPv4(spec));
}
 
Example 29
Source Project: BUbiNG   Source File: RuntimeConfiguration.java    License: Apache License 2.0 5 votes vote down vote up
/** Adds a (or a set of) new host to the black list; the host can be specified directly or it can be a file (prefixed by
 *  <code>file:</code>).
 *
 * @param spec the specification (a host, or a file prefixed by <code>file</code>).
 * @throws ConfigurationException
 * @throws FileNotFoundException
 */
public void addBlackListedHost(final String spec) throws ConfigurationException, FileNotFoundException 	{
	if (spec.length() == 0) return; // Skip empty specs
	if (spec.startsWith("file:")) {
		final LineIterator lineIterator = new LineIterator(new FastBufferedReader(new InputStreamReader(new FileInputStream(spec.substring(5)), Charsets.ISO_8859_1)));
		while (lineIterator.hasNext()) {
			final MutableString line = lineIterator.next();
			blackListedHostHashes.add(line.toString().trim().hashCode());
		}
	}
	else blackListedHostHashes.add(spec.trim().hashCode());
}
 
Example 30
Source Project: BUbiNG   Source File: URLRespectsRobots.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String arg[]) throws IOException {
	char[][] robotsResult = URLRespectsRobots.parseRobotsReader(new FileReader(arg[0]), arg[1]);
	for(char[] a: robotsResult) System.err.println(new String(a));
	final FastBufferedReader in = new FastBufferedReader(new InputStreamReader(System.in, Charsets.US_ASCII));
	final MutableString s = new MutableString();
	while(in.readLine(s) != null) {
		final URI uri = BURL.parse(s);
		System.out.println(apply(robotsResult, uri) + "\t" + uri);
	}
	in.close();

}