Java Code Examples for org.apache.lucene.store.IndexOutput#writeString()

The following examples show how to use org.apache.lucene.store.IndexOutput#writeString() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestCodecUtil.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testCheckFooterValid() throws Exception {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  CodecUtil.writeHeader(output, "FooBar", 5);
  output.writeString("this is the data");
  CodecUtil.writeFooter(output);
  output.close();
  
  ChecksumIndexInput input = new BufferedChecksumIndexInput(new ByteBuffersIndexInput(out.toDataInput(), "temp"));
  Exception mine = new RuntimeException("fake exception");
  RuntimeException expected = expectThrows(RuntimeException.class, () -> {
    CodecUtil.checkFooter(input, mine);
  });
  assertEquals("fake exception", expected.getMessage());
  Throwable suppressed[] = expected.getSuppressed();
  assertEquals(1, suppressed.length);
  assertTrue(suppressed[0].getMessage().contains("checksum passed"));
  input.close();
}
 
Example 2
Source File: TestCodecUtil.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testCheckFooterValidAtFooter() throws Exception {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  CodecUtil.writeHeader(output, "FooBar", 5);
  output.writeString("this is the data");
  CodecUtil.writeFooter(output);
  output.close();
  
  ChecksumIndexInput input = new BufferedChecksumIndexInput(new ByteBuffersIndexInput(out.toDataInput(), "temp"));
  CodecUtil.checkHeader(input, "FooBar", 5, 5);
  assertEquals("this is the data", input.readString());
  Exception mine = new RuntimeException("fake exception");
  RuntimeException expected = expectThrows(RuntimeException.class, () -> {
    CodecUtil.checkFooter(input, mine);
  });
  assertEquals("fake exception", expected.getMessage());
  Throwable suppressed[] = expected.getSuppressed();
  assertEquals(1, suppressed.length);
  assertTrue(suppressed[0].getMessage().contains("checksum passed"));
  input.close();
}
 
Example 3
Source File: TestCodecUtil.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testCheckFooterValidPastFooter() throws Exception {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  CodecUtil.writeHeader(output, "FooBar", 5);
  output.writeString("this is the data");
  CodecUtil.writeFooter(output);
  output.close();
  
  ChecksumIndexInput input = new BufferedChecksumIndexInput(new ByteBuffersIndexInput(out.toDataInput(), "temp"));
  CodecUtil.checkHeader(input, "FooBar", 5, 5);
  assertEquals("this is the data", input.readString());
  // bogusly read a byte too far (can happen)
  input.readByte();
  Exception mine = new RuntimeException("fake exception");
  CorruptIndexException expected = expectThrows(CorruptIndexException.class, () -> {
    CodecUtil.checkFooter(input, mine);
  });
  assertTrue(expected.getMessage().contains("checksum status indeterminate"));
  Throwable suppressed[] = expected.getSuppressed();
  assertEquals(1, suppressed.length);
  assertEquals("fake exception", suppressed[0].getMessage());
  input.close();
}
 
Example 4
Source File: TestCodecUtil.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testCheckFooterInvalid() throws Exception {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  CodecUtil.writeHeader(output, "FooBar", 5);
  output.writeString("this is the data");
  output.writeInt(CodecUtil.FOOTER_MAGIC);
  output.writeInt(0);
  output.writeLong(1234567); // write a bogus checksum
  output.close();

  ChecksumIndexInput input = new BufferedChecksumIndexInput(new ByteBuffersIndexInput(out.toDataInput(), "temp"));
  CodecUtil.checkHeader(input, "FooBar", 5, 5);
  assertEquals("this is the data", input.readString());
  Exception mine = new RuntimeException("fake exception");
  CorruptIndexException expected = expectThrows(CorruptIndexException.class, () -> {
    CodecUtil.checkFooter(input, mine);
  });
  assertTrue(expected.getMessage().contains("checksum failed"));
  Throwable suppressed[] = expected.getSuppressed();
  assertEquals(1, suppressed.length);
  assertEquals("fake exception", suppressed[0].getMessage());
  input.close();
}
 
Example 5
Source File: Text2Bin.java    From lesk-wsd-dsm with GNU General Public License v3.0 6 votes vote down vote up
/** 
 * Convert a WordSpace text matrix to a bin WordSpace file
 * Text matrix format:
 * - the first line contains the matrix dimensions N
 * - each line contains the word vector information: word d1 d2 ... dN
 * Text2Bin text_matrix_file bin_matrix_file
 * @param args the command line arguments
 */
public static void main(String[] args) {
    try {
        BufferedReader in = new BufferedReader(new FileReader(args[0]));
        File file = new File(args[1]);
        FSDirectory fs = FSDirectory.open(file.getParentFile());
        IndexOutput output = fs.createOutput(file.getName());
        String header = in.readLine();
        output.writeString("-dimensions");
        output.writeInt(Integer.parseInt(header));
        while (in.ready()) {
            String line = in.readLine();
            String[] split = line.split("\t");
            output.writeString(split[0]);
            for (int i=1;i<split.length;i++) {
                output.writeInt(Float.floatToIntBits(Float.parseFloat(split[i])));
            }
        }
        in.close();
        output.close();
    } catch (IOException ex) {
        Logger.getLogger(Text2Bin.class.getName()).log(Level.SEVERE, null, ex);
    }
}
 
Example 6
Source File: Completion090PostingsFormat.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
public CompletionFieldsConsumer(SegmentWriteState state) throws IOException {
    this.delegatesFieldsConsumer = delegatePostingsFormat.fieldsConsumer(state);
    String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
    IndexOutput output = null;
    boolean success = false;
    try {
        output = state.directory.createOutput(suggestFSTFile, state.context);
        CodecUtil.writeHeader(output, CODEC_NAME, SUGGEST_VERSION_CURRENT);
        /*
         * we write the delegate postings format name so we can load it
         * without getting an instance in the ctor
         */
        output.writeString(delegatePostingsFormat.getName());
        output.writeString(writeProvider.getName());
        this.suggestFieldsConsumer = writeProvider.consumer(output);
        success = true;
    } finally {
        if (!success) {
            IOUtils.closeWhileHandlingException(output);
        }
    }
}
 
Example 7
Source File: SolrSnapshotMetaDataManager.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private synchronized void persist() throws IOException {
  String fileName = SNAPSHOTS_PREFIX + nextWriteGen;
  IndexOutput out = dir.createOutput(fileName, IOContext.DEFAULT);
  boolean success = false;
  try {
    CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
    out.writeVInt(nameToDetailsMapping.size());
    for(Entry<String,SnapshotMetaData> ent : nameToDetailsMapping.entrySet()) {
      out.writeString(ent.getKey());
      out.writeString(ent.getValue().getIndexDirPath());
      out.writeVLong(ent.getValue().getGenerationNumber());
    }
    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(out);
      IOUtils.deleteFilesIgnoringExceptions(dir, fileName);
    } else {
      IOUtils.close(out);
    }
  }

  dir.sync(Collections.singletonList(fileName));

  if (nextWriteGen > 0) {
    String lastSaveFile = SNAPSHOTS_PREFIX + (nextWriteGen-1);
    // exception OK: likely it didn't exist
    IOUtils.deleteFilesIgnoringExceptions(dir, lastSaveFile);
  }

  nextWriteGen++;
}
 
Example 8
Source File: left_IndexWriter_1.41.java    From gumtree-spoon-ast-diff with Apache License 2.0 5 votes vote down vote up
private final void writeDeleteableFiles(Vector files) throws IOException {
  IndexOutput output = directory.createOutput("deleteable.new");
  try {
    output.writeInt(files.size());
    for (int i = 0; i < files.size(); i++)
      output.writeString((String)files.elementAt(i));
  } finally {
    output.close();
  }
  directory.renameFile("deleteable.new", "deletable");
}
 
Example 9
Source File: right_IndexWriter_1.42.java    From gumtree-spoon-ast-diff with Apache License 2.0 5 votes vote down vote up
private final void writeDeleteableFiles(Vector files) throws IOException {
  IndexOutput output = directory.createOutput("deleteable.new");
  try {
    output.writeInt(files.size());
    for (int i = 0; i < files.size(); i++)
      output.writeString((String)files.elementAt(i));
  } finally {
    output.close();
  }
  directory.renameFile("deleteable.new", "deletable");
}
 
Example 10
Source File: Blur022SegmentInfoWriter.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
@Override
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
  final String fileName = IndexFileNames.segmentFileName(si.name, "", Blur022SegmentInfoFormat.SI_EXTENSION);
  si.addFile(fileName);

  final IndexOutput output = dir.createOutput(fileName, ioContext);

  boolean success = false;
  try {
    CodecUtil.writeHeader(output, Blur022SegmentInfoFormat.CODEC_NAME, Blur022SegmentInfoFormat.VERSION_CURRENT);
    output.writeString(si.getVersion());
    output.writeInt(si.getDocCount());

    output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
    output.writeStringStringMap(si.getDiagnostics());
    Map<String, String> attributes = si.attributes();
    TreeMap<String, String> newAttributes = new TreeMap<String, String>();
    if (attributes != null) {
      newAttributes.putAll(attributes);
    }
    newAttributes.put(Blur022StoredFieldsFormat.STORED_FIELDS_FORMAT_CHUNK_SIZE,
        Integer.toString(_compressionChunkSize));
    newAttributes.put(Blur022StoredFieldsFormat.STORED_FIELDS_FORMAT_COMPRESSION_MODE, _compressionMode);
    output.writeStringStringMap(newAttributes);
    output.writeStringSet(si.files());

    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(output);
      si.dir.deleteFile(fileName);
    } else {
      output.close();
    }
  }
}
 
Example 11
Source File: VectorStoreWriter.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Writes the object vectors to this Lucene output stream.
 * Caller is responsible for opening and closing stream output stream.
 */
public static void writeToIndexOutput(VectorStore objectVectors, FlagConfig flagConfig, IndexOutput outputStream)
    throws IOException {
  // Write header giving vector type and dimension for all vectors.
  outputStream.writeString(generateHeaderString(flagConfig));
  Enumeration<ObjectVector> vecEnum = objectVectors.getAllVectors();

  // Write each vector.
  while (vecEnum.hasMoreElements()) {
    ObjectVector objectVector = vecEnum.nextElement();
    outputStream.writeString(objectVector.getObject().toString());
    objectVector.getVector().writeToLuceneStream(outputStream);
  }
  VerbatimLogger.info("finished writing vectors.\n");
}
 
Example 12
Source File: MtasFieldsConsumer.java    From mtas with Apache License 2.0 5 votes vote down vote up
/**
 * Register prefix.
 *
 * @param field
 *          the field
 * @param prefix
 *          the prefix
 * @param outPrefix
 *          the out prefix
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
private void registerPrefix(String field, String prefix,
    IndexOutput outPrefix) throws IOException {
  if (!prefixReferenceIndex.containsKey(field)) {
    prefixReferenceIndex.put(field, new HashMap<String, Long>());
    prefixIdIndex.put(field, new HashMap<String, Integer>());
  }
  if (!prefixReferenceIndex.get(field).containsKey(prefix)) {
    int id = 1 + prefixReferenceIndex.get(field).size();
    prefixReferenceIndex.get(field).put(prefix, outPrefix.getFilePointer());
    prefixIdIndex.get(field).put(prefix, id);
    outPrefix.writeString(prefix);
  }
}
 
Example 13
Source File: TestCodecUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSegmentHeaderLength() throws Exception {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  CodecUtil.writeIndexHeader(output, "FooBar", 5, StringHelper.randomId(), "xyz");
  output.writeString("this is the data");
  output.close();
  
  IndexInput input = new ByteBuffersIndexInput(out.toDataInput(), "temp");
  input.seek(CodecUtil.indexHeaderLength("FooBar", "xyz"));
  assertEquals("this is the data", input.readString());
  input.close();
}
 
Example 14
Source File: TestCodecUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testChecksumEntireFile() throws Exception {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  CodecUtil.writeHeader(output, "FooBar", 5);
  output.writeString("this is the data");
  CodecUtil.writeFooter(output);
  output.close();
  
  IndexInput input = new ByteBuffersIndexInput(out.toDataInput(), "temp");
  CodecUtil.checksumEntireFile(input);
  input.close();
}
 
Example 15
Source File: TestCodecUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testHeaderLength() throws Exception {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  CodecUtil.writeHeader(output, "FooBar", 5);
  output.writeString("this is the data");
  output.close();
  
  IndexInput input = new ByteBuffersIndexInput(out.toDataInput(), "temp");
  input.seek(CodecUtil.headerLength("FooBar"));
  assertEquals("this is the data", input.readString());
  input.close();
}
 
Example 16
Source File: TestFailIfUnreferencedFiles.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testDummy() throws Exception {
  MockDirectoryWrapper dir = newMockDirectory();
  dir.setAssertNoUnrefencedFilesOnClose(true);
  IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(null));
  iw.addDocument(new Document());
  iw.close();
  IndexOutput output = dir.createOutput("_hello.world", IOContext.DEFAULT);
  output.writeString("i am unreferenced!");
  output.close();
  dir.sync(Collections.singleton("_hello.world"));
  dir.close();
}
 
Example 17
Source File: IncrementalDocVectors.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private void trainIncrementalDocVectors() throws IOException {
  int numdocs = luceneUtils.getNumDocs();

  // Open file and write headers.
  File vectorFile = new File(
      VectorStoreUtils.getStoreFileName(flagConfig.docvectorsfile(), flagConfig));
  String parentPath = vectorFile.getParent();
  if (parentPath == null) parentPath = "";
  FSDirectory fsDirectory = FSDirectory.open(FileSystems.getDefault().getPath(parentPath));

  java.nio.file.Files.deleteIfExists(vectorFile.toPath());
  
  IndexOutput outputStream = fsDirectory.createOutput(vectorFile.getName(), IOContext.DEFAULT);

  VerbatimLogger.info("Writing vectors incrementally to file " + vectorFile + " ... ");

  // Write header giving number of dimension for all vectors.
  outputStream.writeString(VectorStoreWriter.generateHeaderString(flagConfig));

  // Iterate through documents.
  for (int dc = 0; dc < numdocs; dc++) {
    // Output progress counter.
    if ((dc > 0) && ((dc % 10000 == 0) || (dc < 10000 && dc % 1000 == 0))) {
      VerbatimLogger.info("Processed " + dc + " documents ... ");
    }

    // Get filename and path to be used as document vector ID, defaulting to doc number only if
    // docidfield is not populated.
    String docID = luceneUtils.getExternalDocId(dc);

    Vector docVector = VectorFactory.createZeroVector(flagConfig.vectortype(), flagConfig.dimension());

    for (String fieldName : flagConfig.contentsfields()) {
      Terms terms = luceneUtils.getTermVector(dc, fieldName);

      if (terms == null) {
        VerbatimLogger.fine(
            String.format(
                "When building document vectors, no term vector for field: '%s' in document '%s'.",
                fieldName, docID));
        continue;
      }

      TermsEnum termsEnum = terms.iterator();
      BytesRef bytes;
      while ((bytes = termsEnum.next()) != null) {
        Term term = new Term(fieldName, bytes);
        String termString = term.text();
        PostingsEnum docs = termsEnum.postings(null);
        docs.nextDoc();
        int freq = docs.freq();

        try {
          Vector termVector = termVectorData.getVector(termString);
          if (termVector != null && termVector.getDimension() > 0) {
            float localweight = luceneUtils.getLocalTermWeight(freq);
            float globalweight = luceneUtils.getGlobalTermWeight(new Term(fieldName, termString));
            float fieldweight = 1;

            if (flagConfig.fieldweight()) {
              //field weight: 1/sqrt(number of terms in field)
              fieldweight = (float) (1 / Math.sqrt(terms.size()));
            }

            // Add contribution from this term, excluding terms that
            // are not represented in termVectorData.
            docVector.superpose(termVector, localweight * globalweight * fieldweight, null);
          }
        } catch (NullPointerException npe) {
          // Don't normally print anything - too much data!
          logger.finest("term " + termString + " not represented");
        }
      }
    }

    if (docVector.isZeroVector()) {
      logger.warning(String.format(
          "Outputting zero vector for document '%s'. This probably means that none of " +
              "the -contentsfields were populated, or all terms failed the LuceneUtils termsfilter." +
              " You may want to investigate.",
          docID));
    }

    // All fields in document have been processed. Write out documentID and normalized vector.
    docVector.normalize();
    outputStream.writeString(docID);
    docVector.writeToLuceneStream(outputStream);
  } // Finish iterating through documents.

  VerbatimLogger.info("Finished writing vectors.\n");
  outputStream.close();
  fsDirectory.close();
}
 
Example 18
Source File: VectorStoreTruncater.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * @param args
 */
public static void main(String[] args) {
	// TODO Auto-generated method stub

	try {
	FlagConfig flagConfig 			= FlagConfig.getFlagConfig(args);
	VectorStoreRAM objectVectors 	= new VectorStoreRAM(flagConfig);
	String[] argsRemaining			= flagConfig.remainingArgs;
	String incomingVecs				= argsRemaining[0];
	int newDimension				= Integer.parseInt(argsRemaining[1]);
	objectVectors.initFromFile(incomingVecs);
	
	if (newDimension > flagConfig.dimension())
		{
		
				System.out.println("Incoming file has dimensionality of " +flagConfig.dimension());
				System.out.println("New dimensionality must be less than incoming vector length, quitting");
				System.exit(0);	
		}
	
		String vectorFileName = incomingVecs.replaceAll("\\.bin", "")+"_"+newDimension+".bin";
	  	File vectorFile = new File(vectorFileName);
	    String parentPath = vectorFile.getParent();
	    if (parentPath == null) parentPath = "";
	    FSDirectory fsDirectory = FSDirectory.open(FileSystems.getDefault().getPath(parentPath));
	    IndexOutput outputStream = fsDirectory.createOutput(vectorFile.getName(), IOContext.DEFAULT);
	 	flagConfig.setDimension(newDimension);
		outputStream.writeString(VectorStoreWriter.generateHeaderString(flagConfig));
	    Enumeration<ObjectVector> vecEnum = objectVectors.getAllVectors();

	    // Write each vector.
	    while (vecEnum.hasMoreElements()) {
	      ObjectVector objectVector = vecEnum.nextElement();
	      outputStream.writeString(objectVector.getObject().toString());
	      objectVector.getVector().writeToLuceneStream(outputStream,flagConfig.dimension());
	    }
	    
	    
	    outputStream.close();
	    fsDirectory.close();
		
	    VerbatimLogger.info("wrote "+objectVectors.getNumVectors()+" vectors to file "+ vectorFileName);
	    VerbatimLogger.info("finished writing vectors.\n");
	 		
	} catch (IOException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
		System.out.println("Usage: VectorStoreTruncater incomingFile.bin newDimensinoality");
	}
	
	
	
}