Java Code Examples for org.apache.lucene.document.FieldType#setStored()

The following examples show how to use org.apache.lucene.document.FieldType#setStored() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OLuceneSpatialIndexManager.java    From orientdb-lucene with Apache License 2.0 6 votes vote down vote up
private Document newGeoDocument(OIdentifiable oIdentifiable, Shape shape) {

    FieldType ft = new FieldType();
    ft.setIndexed(true);
    ft.setStored(true);

    Document doc = new Document();

    doc.add(OLuceneIndexType.createField(RID, oIdentifiable.getIdentity().toString(), Field.Store.YES,
        Field.Index.NOT_ANALYZED_NO_NORMS));
    for (IndexableField f : strategy.createIndexableFields(shape)) {
      doc.add(f);
    }

    doc.add(new StoredField(strategy.getFieldName(), ctx.toString(shape)));

    return doc;
  }
 
Example 2
Source File: TestQueryParser.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private boolean isAHit(Query q, String content, Analyzer analyzer) throws IOException{
  Directory ramDir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), ramDir, analyzer);
  Document doc = new Document();
  FieldType fieldType = new FieldType();
  fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
  fieldType.setTokenized(true);
  fieldType.setStored(true);
  Field field = new Field(FIELD, content, fieldType);
  doc.add(field);
  writer.addDocument(doc);
  writer.close();
  DirectoryReader ir = DirectoryReader.open(ramDir);
  IndexSearcher is = new IndexSearcher(ir);
    
  long hits = is.count(q);
  ir.close();
  ramDir.close();
  if (hits == 1){
    return true;
  } else {
    return false;
  }

}
 
Example 3
Source File: TermDocIterableTest.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
private void addDocumentBlock(int id, int count, IndexWriter writer) throws IOException {
  FieldType fieldType = new FieldType();
  fieldType.setIndexed(true);
  fieldType.setOmitNorms(true);
  fieldType.setTokenized(false);
  fieldType.setStored(true);

  FieldType fieldTypeNoIndex = new FieldType();
  fieldTypeNoIndex.setStored(true);
  fieldTypeNoIndex.setIndexed(false);

  for (int i = 0; i < count; i++) {
    Document document = new Document();
    document.add(new Field("id", Integer.toString(id), fieldType));
    document.add(new Field("field", Integer.toString(i), fieldType));
    for (int j = 0; j < 100; j++) {
      document.add(new Field("field" + j, "testing here testing here testing here testing here testing here testing here testing here", fieldTypeNoIndex));
    }
    writer.addDocument(document);
  }
}
 
Example 4
Source File: BaseStoredFieldsFormatTestCase.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testIndexedBit() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  FieldType onlyStored = new FieldType();
  onlyStored.setStored(true);
  doc.add(new Field("field", "value", onlyStored));
  doc.add(new StringField("field2", "value", Field.Store.YES));
  w.addDocument(doc);
  IndexReader r = maybeWrapWithMergingReader(w.getReader());
  w.close();
  assertEquals(IndexOptions.NONE, r.document(0).getField("field").fieldType().indexOptions());
  assertNotNull(r.document(0).getField("field2").fieldType().indexOptions());
  r.close();
  dir.close();
}
 
Example 5
Source File: TestUnifiedHighlighter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private IndexReader indexSomeFields() throws IOException {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
  FieldType ft = new FieldType();
  ft.setIndexOptions(IndexOptions.NONE);
  ft.setTokenized(false);
  ft.setStored(true);
  ft.freeze();

  Field title = new Field("title", "", fieldType);
  Field text = new Field("text", "", fieldType);
  Field category = new Field("category", "", fieldType);

  Document doc = new Document();
  doc.add(title);
  doc.add(text);
  doc.add(category);
  title.setStringValue("This is the title field.");
  text.setStringValue("This is the text field. You can put some text if you want.");
  category.setStringValue("This is the category field.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();
  return ir;
}
 
Example 6
Source File: LuceneIndexer.java    From ontopia with Apache License 2.0 5 votes vote down vote up
protected FieldType getFieldType(FieldIF field) {
  FieldType type = new FieldType();
  type.setStored(field.isStored());
  type.setTokenized(field.isTokenized());
  type.setIndexOptions(field.isIndexed() ? IndexOptions.DOCS_AND_FREQS : IndexOptions.NONE);
  return type;
}
 
Example 7
Source File: TestPointVectorStrategy.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testFieldOptions() throws IOException, ParseException {
  // It's not stored; test it isn't.
  this.strategy = PointVectorStrategy.newInstance(ctx, getClass().getSimpleName());
  adoc("99", "POINT(-5.0 8.2)");
  commit();
  SearchResults results = executeQuery(new MatchAllDocsQuery(), 1);
  Document document = results.results.get(0).document;
  assertNull("not stored", document.getField(strategy.getFieldName() + PointVectorStrategy.SUFFIX_X));
  assertNull("not stored", document.getField(strategy.getFieldName() + PointVectorStrategy.SUFFIX_Y));
  deleteAll();

  // Now we mark it stored.  We also disable pointvalues...
  FieldType fieldType = new FieldType(PointVectorStrategy.DEFAULT_FIELDTYPE);
  fieldType.setStored(true);
  fieldType.setDimensions(0, 0);//disable point values
  this.strategy = new PointVectorStrategy(ctx, getClass().getSimpleName(), fieldType);
  adoc("99", "POINT(-5.0 8.2)");
  commit();
  results = executeQuery(new MatchAllDocsQuery(), 1);
  document = results.results.get(0).document;
  assertEquals("stored", -5.0, document.getField(strategy.getFieldName() + PointVectorStrategy.SUFFIX_X).numericValue());
  assertEquals("stored", 8.2,  document.getField(strategy.getFieldName() + PointVectorStrategy.SUFFIX_Y).numericValue());

  // Test a query fails without point values
  expectThrows(UnsupportedOperationException.class, () -> {
    SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects, ctx.makeRectangle(-10.0, 10.0, -5.0, 5.0));
    this.strategy.makeQuery(args);
  });
}
 
Example 8
Source File: IndexOptionsDialogFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void saveOptions() {
  nf.setStored(storedCB.isSelected());
  if (nf.getType().equals(Field.class)) {
    FieldType ftype = (FieldType) nf.getFieldType();
    ftype.setStored(storedCB.isSelected());
    ftype.setTokenized(tokenizedCB.isSelected());
    ftype.setOmitNorms(omitNormsCB.isSelected());
    ftype.setIndexOptions(IndexOptions.valueOf((String) idxOptCombo.getSelectedItem()));
    ftype.setStoreTermVectors(storeTVCB.isSelected());
    ftype.setStoreTermVectorPositions(storeTVPosCB.isSelected());
    ftype.setStoreTermVectorOffsets(storeTVOffCB.isSelected());
    ftype.setStoreTermVectorPayloads(storeTVPayCB.isSelected());
  }
  dialog.dispose();
}
 
Example 9
Source File: BackwardsTermCustomScoreQueryTest.java    From lucene-query-example with Apache License 2.0 5 votes vote down vote up
Field newField(String name, String value, Store stored) {
	FieldType tagsFieldType = new FieldType();
	tagsFieldType.setStored(stored == Store.YES);
	IndexOptions opts = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
	tagsFieldType.setIndexOptions(opts);
	return new Field(name, value, tagsFieldType);
}
 
Example 10
Source File: BackwardsTermQueryTest.java    From lucene-query-example with Apache License 2.0 5 votes vote down vote up
Field newField(String name, String value, Store stored) {
	FieldType tagsFieldType = new FieldType();
	tagsFieldType.setStored(stored == Store.YES);
	IndexOptions opts = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
	tagsFieldType.setIndexOptions(opts);
	return new Field(name, value, tagsFieldType);
}
 
Example 11
Source File: Lucene101Test.java    From lucene-query-example with Apache License 2.0 5 votes vote down vote up
Field newField(String name, String value, Store stored) {
	FieldType tagsFieldType = new FieldType();
	tagsFieldType.setStored(stored == Store.YES);
	IndexOptions opts = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
	tagsFieldType.setIndexOptions(opts);
	return new Field(name, value, tagsFieldType);
}
 
Example 12
Source File: TestBinaryTerms.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testBinary() throws IOException {    
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  BytesRef bytes = new BytesRef(2);
  
  for (int i = 0; i < 256; i++) {
    bytes.bytes[0] = (byte) i;
    bytes.bytes[1] = (byte) (255 - i);
    bytes.length = 2;
    Document doc = new Document();
    FieldType customType = new FieldType();
    customType.setStored(true);
    doc.add(newField("id", "" + i, customType));
    doc.add(newStringField("bytes", bytes, Field.Store.NO));
    iw.addDocument(doc);
  }
  
  IndexReader ir = iw.getReader();
  iw.close();
  
  IndexSearcher is = newSearcher(ir);
  
  for (int i = 0; i < 256; i++) {
    bytes.bytes[0] = (byte) i;
    bytes.bytes[1] = (byte) (255 - i);
    bytes.length = 2;
    TopDocs docs = is.search(new TermQuery(new Term("bytes", bytes)), 5);
    assertEquals(1, docs.totalHits.value);
    assertEquals("" + i, is.doc(docs.scoreDocs[0].doc).get("id"));
  }
  
  ir.close();
  dir.close();
}
 
Example 13
Source File: TestDirectoryReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
static void addDocumentWithDifferentFields(IndexWriter writer) throws IOException
{
  Document doc = new Document();
  
  FieldType customType3 = new FieldType();
  customType3.setStored(true);
  doc.add(newStringField("keyword2", "test1", Field.Store.YES));
  doc.add(newTextField("text2", "test1", Field.Store.YES));
  doc.add(newField("unindexed2", "test1", customType3));
  doc.add(new TextField("unstored2","test1", Field.Store.NO));
  writer.addDocument(doc);
}
 
Example 14
Source File: TestOmitNorms.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Tests various combinations of omitNorms=true/false, the field not existing at all,
 * ensuring that only omitNorms is 'viral'.
 * Internally checks that MultiNorms.norms() is consistent (returns the same bytes)
 * as the fully merged equivalent.
 */
public void testOmitNormsCombos() throws IOException {
  // indexed with norms
  FieldType customType = new FieldType(TextField.TYPE_STORED);
  Field norms = new Field("foo", "a", customType);
  // indexed without norms
  FieldType customType1 = new FieldType(TextField.TYPE_STORED);
  customType1.setOmitNorms(true);
  Field noNorms = new Field("foo", "a", customType1);
  // not indexed, but stored
  FieldType customType2 = new FieldType();
  customType2.setStored(true);
  Field noIndex = new Field("foo", "a", customType2);
  // not indexed but stored, omitNorms is set
  FieldType customType3 = new FieldType();
  customType3.setStored(true);
  customType3.setOmitNorms(true);
  Field noNormsNoIndex = new Field("foo", "a", customType3);
  // not indexed nor stored (doesnt exist at all, we index a different field instead)
  Field emptyNorms = new Field("bar", "a", customType);
  
  assertNotNull(getNorms("foo", norms, norms));
  assertNull(getNorms("foo", norms, noNorms));
  assertNotNull(getNorms("foo", norms, noIndex));
  assertNotNull(getNorms("foo", norms, noNormsNoIndex));
  assertNotNull(getNorms("foo", norms, emptyNorms));
  assertNull(getNorms("foo", noNorms, noNorms));
  assertNull(getNorms("foo", noNorms, noIndex));
  assertNull(getNorms("foo", noNorms, noNormsNoIndex));
  assertNull(getNorms("foo", noNorms, emptyNorms));
  assertNull(getNorms("foo", noIndex, noIndex));
  assertNull(getNorms("foo", noIndex, noNormsNoIndex));
  assertNull(getNorms("foo", noIndex, emptyNorms));
  assertNull(getNorms("foo", noNormsNoIndex, noNormsNoIndex));
  assertNull(getNorms("foo", noNormsNoIndex, emptyNorms));
  assertNull(getNorms("foo", emptyNorms, emptyNorms));
}
 
Example 15
Source File: LuceneIndexer.java    From MtgDesktopCompanion with GNU General Public License v3.0 5 votes vote down vote up
private Document toDocuments(MagicCard mc) {
       Document doc = new Document();
       			
       		FieldType fieldType = new FieldType();
         		fieldType.setStored(true);
         		fieldType.setStoreTermVectors(true);
         		fieldType.setTokenized(true);
         		fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
         		
        		   doc.add(new Field("name", mc.getName(), fieldType));
        		   
        		   if(mc.getCost()!=null)
        			   doc.add(new Field("cost", mc.getCost(),fieldType));
        		   else
        			   doc.add(new Field("cost", "",fieldType));
        		  
        		   if(mc.getText()!=null)
        			   doc.add(new Field("text", mc.getText(), fieldType));
        		   else
        			   doc.add(new Field("text", "", fieldType));
        		   
        		   doc.add(new Field("type", mc.getFullType(), fieldType));
        		   doc.add(new Field("set",mc.getCurrentSet().getId(),fieldType));
        		   doc.add(new StoredField("cmc",mc.getCmc()));
        		   doc.add(new StringField("data",serializer.toJson(mc),Field.Store.YES));
        		   
         	   for(MTGColor color:mc.getColors())
         	   {
         		   doc.add(new Field("color", color.getCode(), fieldType));
         	   }
         	 
         	   
    		   
      return doc;
}
 
Example 16
Source File: DocumentsTestBase.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
protected void createIndex() throws IOException {
  indexDir = createTempDir();

  Directory dir = newFSDirectory(indexDir);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, new StandardAnalyzer());

  FieldType titleType = new FieldType();
  titleType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  titleType.setStored(true);
  titleType.setTokenized(true);
  titleType.setOmitNorms(true);

  FieldType authorType = new FieldType();
  authorType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
  authorType.setStored(true);
  authorType.setTokenized(true);
  authorType.setOmitNorms(false);

  FieldType textType = new FieldType();
  textType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  textType.setStored(false);
  textType.setTokenized(true);
  textType.setStoreTermVectors(true);
  textType.setOmitNorms(false);

  FieldType downloadsType = new FieldType();
  downloadsType.setDimensions(1, Integer.BYTES);
  downloadsType.setStored(true);

  Document doc1 = new Document();
  doc1.add(new Field("title", "Pride and Prejudice", titleType));
  doc1.add(new Field("author", "Jane Austen", authorType));
  doc1.add(new Field("text",
      "It is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife.",
      textType));
  doc1.add(new SortedSetDocValuesField("subject", new BytesRef("Fiction")));
  doc1.add(new SortedSetDocValuesField("subject", new BytesRef("Love stories")));
  doc1.add(new Field("downloads", packInt(28533), downloadsType));
  writer.addDocument(doc1);

  Document doc2 = new Document();
  doc2.add(new Field("title", "Alice's Adventures in Wonderland", titleType));
  doc2.add(new Field("author", "Lewis Carroll", authorType));
  doc2.add(new Field("text", "Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, ‘and what is the use of a book,’ thought Alice ‘without pictures or conversations?’",
      textType));
  doc2.add(new SortedSetDocValuesField("subject", new BytesRef("Fantasy literature")));
  doc2.add(new Field("downloads", packInt(18712), downloadsType));
  writer.addDocument(doc2);

  Document doc3 = new Document();
  doc3.add(new Field("title", "Frankenstein; Or, The Modern Prometheus", titleType));
  doc3.add(new Field("author", "Mary Wollstonecraft Shelley", authorType));
  doc3.add(new Field("text", "You will rejoice to hear that no disaster has accompanied the commencement of an enterprise which you have regarded with such evil forebodings. I arrived here yesterday, and my first task is to assure my dear sister of my welfare and increasing confidence in the success of my undertaking.",
      textType));
  doc3.add(new SortedSetDocValuesField("subject", new BytesRef("Science fiction")));
  doc3.add(new SortedSetDocValuesField("subject", new BytesRef("Horror tales")));
  doc3.add(new SortedSetDocValuesField("subject", new BytesRef("Monsters")));
  doc3.add(new Field("downloads", packInt(14737), downloadsType));
  writer.addDocument(doc3);

  Document doc4 = new Document();
  doc4.add(new Field("title", "A Doll's House : a play", titleType));
  doc4.add(new Field("author", "Henrik Ibsen", authorType));
  doc4.add(new Field("text", "",
      textType));
  doc4.add(new SortedSetDocValuesField("subject", new BytesRef("Drama")));
  doc4.add(new Field("downloads", packInt(14629), downloadsType));
  writer.addDocument(doc4);

  Document doc5 = new Document();
  doc5.add(new Field("title", "The Adventures of Sherlock Holmes", titleType));
  doc5.add(new Field("author", "Arthur Conan Doyle", authorType));
  doc5.add(new Field("text", "To Sherlock Holmes she is always the woman. I have seldom heard him mention her under any other name. In his eyes she eclipses and predominates the whole of her sex.",
      textType));
  doc5.add(new SortedSetDocValuesField("subject", new BytesRef("Fiction")));
  doc5.add(new SortedSetDocValuesField("subject", new BytesRef("Detective and mystery stories")));
  doc5.add(new Field("downloads", packInt(12828), downloadsType));
  writer.addDocument(doc5);

  writer.commit();

  writer.close();
  dir.close();
}
 
Example 17
Source File: TestExceedMaxTermLength.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void test() throws Exception {
  
  IndexWriter w = new IndexWriter
    (dir, newIndexWriterConfig(random(), new MockAnalyzer(random())));
  try {
    final FieldType ft = new FieldType();
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
    ft.setStored(random().nextBoolean());
    ft.freeze();
    
    final Document doc = new Document();
    if (random().nextBoolean()) {
      // totally ok short field value
      doc.add(new Field(TestUtil.randomSimpleString(random(), 1, 10),
                        TestUtil.randomSimpleString(random(), 1, 10),
                        ft));
    }
    // problematic field
    final String name = TestUtil.randomSimpleString(random(), 1, 50);
    final String value = TestUtil.randomSimpleString(random(),
                                                     minTestTermLength,
                                                     maxTestTermLegnth);
    final Field f = new Field(name, value, ft);
    if (random().nextBoolean()) {
      // totally ok short field value
      doc.add(new Field(TestUtil.randomSimpleString(random(), 1, 10),
                        TestUtil.randomSimpleString(random(), 1, 10),
                        ft));
    }
    doc.add(f);
    
    IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
      w.addDocument(doc);
    });
    String maxLengthMsg = String.valueOf(IndexWriter.MAX_TERM_LENGTH);
    String msg = expected.getMessage();
    assertTrue("IllegalArgumentException didn't mention 'immense term': " + msg,
               msg.contains("immense term"));
    assertTrue("IllegalArgumentException didn't mention max length ("+maxLengthMsg+"): " + msg,
               msg.contains(maxLengthMsg));
    assertTrue("IllegalArgumentException didn't mention field name ("+name+"): " + msg,
               msg.contains(name));
    assertTrue("IllegalArgumentException didn't mention original message: " + msg,
               msg.contains("bytes can be at most") && msg.contains("in length; got"));
  } finally {
    w.close();
  }
}
 
Example 18
Source File: TestFieldInfos.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testFieldAttributes() throws Exception{
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
      .setMergePolicy(NoMergePolicy.INSTANCE));

  FieldType type1 = new FieldType();
  type1.setStored(true);
  type1.putAttribute("testKey1", "testValue1");

  Document d1 = new Document();
  d1.add(new Field("f1", "v1", type1));
  FieldType type2 = new FieldType(type1);
  //changing the value after copying shouldn't impact the original type1
  type2.putAttribute("testKey1", "testValue2");
  writer.addDocument(d1);
  writer.commit();

  Document d2 = new Document();
  type1.putAttribute("testKey1", "testValueX");
  type1.putAttribute("testKey2", "testValue2");
  d2.add(new Field("f1", "v2", type1));
  d2.add(new Field("f2", "v2", type2));
  writer.addDocument(d2);
  writer.commit();
  writer.forceMerge(1);

  IndexReader reader = writer.getReader();
  FieldInfos fis = FieldInfos.getMergedFieldInfos(reader);
  assertEquals(fis.size(), 2);
  Iterator<FieldInfo>  it = fis.iterator();
  while(it.hasNext()) {
    FieldInfo fi = it.next();
    switch (fi.name) {
      case "f1":
        // testKey1 can point to either testValue1 or testValueX based on the order
        // of merge, but we see textValueX winning here since segment_2 is merged on segment_1.
        assertEquals("testValueX", fi.getAttribute("testKey1"));
        assertEquals("testValue2", fi.getAttribute("testKey2"));
        break;
      case "f2":
        assertEquals("testValue2", fi.getAttribute("testKey1"));
        break;
      default:
        assertFalse("Unknown field", true);
    }
  }
  reader.close();
  writer.close();
  dir.close();
}
 
Example 19
Source File: LuceneIndexFromSemrepTriples.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
    * This class indexes the file passed as a parameter, writing to the index passed as a parameter.
    * Each predication is indexed as an individual document, with the fields "subject", "predicate", and "object"

    * @throws IOException
    */
   static void indexDoc(IndexWriter fsWriter, File triplesTextFile) throws IOException {
BufferedReader theReader = new BufferedReader(new FileReader(triplesTextFile));
int linecnt = 0;
String lineIn;
while ((lineIn = theReader.readLine()) != null)  {   
    java.util.StringTokenizer theTokenizer = new java.util.StringTokenizer(lineIn,"\t");
    // Output progress counter.
    if( ( ++linecnt % 10000 == 0 ) || ( linecnt < 10000 && linecnt % 1000 == 0 ) ){
	VerbatimLogger.info((linecnt) + " ... ");
    }
    try {
	if (theTokenizer.countTokens() < 3) {
	    VerbatimLogger.warning(
				   "Line in predication file does not have three delimited fields: " + lineIn + "\n");
	    lineIn = theReader.readLine();
	    continue;
	}

	String subject = theTokenizer.nextToken().trim().toLowerCase().replaceAll(" ", "_").replaceAll("\\|\\|\\|.*", "");
	String subject_CUI = theTokenizer.nextToken().trim().toLowerCase().replaceAll(" ", "_");
	String subject_semtype = theTokenizer.nextToken().trim().toLowerCase().replaceAll(" ", "_");
       
	String predicate = theTokenizer.nextToken().trim().toUpperCase().replaceAll(" ", "_");
	String object = theTokenizer.nextToken().trim().toLowerCase().replaceAll(" ", "_").replaceAll("\\|\\|\\|.*", "");
	String object_CUI = theTokenizer.nextToken().trim().toLowerCase().replaceAll(" ", "_");
	String object_semtype = theTokenizer.nextToken().trim().toLowerCase().replaceAll(" ", "_");
       
	String PMID = theTokenizer.nextToken();
	String source = theTokenizer.nextToken();
       
	Document doc = new Document();
	doc.add(new TextField("subject", subject, Field.Store.YES));
	doc.add(new TextField("subject_CUI", subject_CUI, Field.Store.YES));
	doc.add(new TextField("subject_semtype", subject_semtype, Field.Store.YES));
	doc.add(new TextField("predicate", predicate, Field.Store.YES));
	doc.add(new TextField("object", object, Field.Store.YES));
	doc.add(new TextField("object_CUI", object_CUI, Field.Store.YES));
	doc.add(new TextField("object_semtype", object_semtype, Field.Store.YES));
	doc.add(new TextField("predication",subject+predicate+object, Field.Store.NO));
	doc.add(new TextField("PMID",PMID, Field.Store.YES));
         
	//create new FieldType to store term positions (TextField is not sufficiently configurable)
	FieldType ft = new FieldType();
	//the next line was commented out when the original index was buildt (v1.0)
	//ft.setIndexed(true);
	ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
	ft.setStored(true);
	ft.setTokenized(true);
	ft.setStoreTermVectors(true);
	ft.setStoreTermVectorPositions(true);
	Field contentsField = new Field("source", source, ft);
	doc.add(contentsField);
         
	fsWriter.addDocument(doc);
    }
    catch (Exception e) {
	System.out.println(lineIn);
	e.printStackTrace();
    }
}
VerbatimLogger.info("\n");  // Newline after line counter prints.
theReader.close();
   }
 
Example 20
Source File: TestAddIndexes.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testHangOnClose() throws IOException {

    Directory dir = newDirectory();
    LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
    lmp.setNoCFSRatio(0.0);
    lmp.setMergeFactor(100);
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
        new MockAnalyzer(random()))
        .setMaxBufferedDocs(5).setMergePolicy(lmp));

    Document doc = new Document();
    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectorOffsets(true);
    doc.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType));
    for(int i=0;i<60;i++)
      writer.addDocument(doc);

    Document doc2 = new Document();
    FieldType customType2 = new FieldType();
    customType2.setStored(true);
    doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2));
    doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2));
    doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2));
    doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2));
    for(int i=0;i<10;i++)
      writer.addDocument(doc2);
    writer.close();

    Directory dir2 = newDirectory();
    lmp = new LogByteSizeMergePolicy();
    lmp.setMinMergeMB(0.0001);
    lmp.setNoCFSRatio(0.0);
    lmp.setMergeFactor(4);
    writer = new IndexWriter(dir2, newIndexWriterConfig(new MockAnalyzer(random()))
        .setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(lmp));
    writer.addIndexes(dir);
    writer.close();
    dir.close();
    dir2.close();
  }