Java Code Examples for org.apache.nifi.provenance.index.EventIndexWriter#index()

The following examples show how to use org.apache.nifi.provenance.index.EventIndexWriter#index() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MigrateDefunctIndex.java    From nifi with Apache License 2.0 6 votes vote down vote up
private void rebuildIndex(final File tempIndexDir, final File migratedIndexDir) throws IOException {
    final EventIndexWriter writer = indexManager.borrowIndexWriter(tempIndexDir);

    try {
        final EventIterator eventIterator = eventStore.getEventsByTimestamp(minTimestamp, maxTimestamp);

        final StopWatch stopWatch = new StopWatch(true);

        Optional<ProvenanceEventRecord> optionalEvent;
        while ((optionalEvent = eventIterator.nextEvent()).isPresent()) {
            final ProvenanceEventRecord event = optionalEvent.get();

            final Document document = eventConverter.convert(event, event.getEventId());
            writer.index(document, Integer.MAX_VALUE);
            successCount++;
        }

        writer.commit();
        stopWatch.stop();
        logger.info("Successfully indexed {} events to {} in {}", successCount, tempIndexDir, stopWatch.getDuration());
    } finally {
        indexManager.returnIndexWriter(writer, true, true);
    }

    Files.move(tempIndexDir.toPath(), migratedIndexDir.toPath());
}
 
Example 2
Source File: TestCachingIndexManager.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws IOException {
    // Create and IndexWriter and add a document to the index, then close the writer.
    // This gives us something that we can query.
    final EventIndexWriter writer = manager.borrowIndexWriter(indexDir);
    final Document doc = new Document();
    doc.add(new StringField("unit test", "true", Store.YES));
    writer.index(doc, 1000);
    manager.returnIndexWriter(writer);

    // Get an Index Searcher that we can use to query the index.
    final EventIndexSearcher cachedSearcher = manager.borrowIndexSearcher(indexDir);

    // Ensure that we get the expected results.
    assertCount(cachedSearcher, 1);

    // While we already have an Index Searcher, get a writer for the same index.
    // This will cause the Index Searcher to be marked as poisoned.
    final EventIndexWriter writer2 = manager.borrowIndexWriter(indexDir);

    // Obtain a new Index Searcher with the writer open. This Index Searcher should *NOT*
    // be the same as the previous searcher because the new one will be a Near-Real-Time Index Searcher
    // while the other is not.
    final EventIndexSearcher nrtSearcher = manager.borrowIndexSearcher(indexDir);
    assertNotSame(cachedSearcher, nrtSearcher);

    // Ensure that we get the expected query results.
    assertCount(nrtSearcher, 1);

    // Return the writer, so that there is no longer an active writer for the index.
    manager.returnIndexWriter(writer2);

    // Ensure that we still get the same result.
    assertCount(cachedSearcher, 1);
    manager.returnIndexSearcher(cachedSearcher);

    // Ensure that our near-real-time index searcher still gets the same result.
    assertCount(nrtSearcher, 1);
    manager.returnIndexSearcher(nrtSearcher);
}
 
Example 3
Source File: TestSimpleIndexManager.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultipleWritersSimultaneouslySameIndex() throws IOException {
    final SimpleIndexManager mgr = new SimpleIndexManager(new RepositoryConfiguration());
    final File dir = new File("target/" + UUID.randomUUID().toString());
    try {
        final EventIndexWriter writer1 = mgr.borrowIndexWriter(dir);
        final EventIndexWriter writer2 = mgr.borrowIndexWriter(dir);

        final Document doc1 = new Document();
        doc1.add(new StringField("id", "1", Store.YES));

        final Document doc2 = new Document();
        doc2.add(new StringField("id", "2", Store.YES));

        writer1.index(doc1, 1000);
        writer2.index(doc2, 1000);
        mgr.returnIndexWriter(writer2);
        mgr.returnIndexWriter(writer1);

        final EventIndexSearcher searcher = mgr.borrowIndexSearcher(dir);
        final TopDocs topDocs = searcher.getIndexSearcher().search(new MatchAllDocsQuery(), 2);
        assertEquals(2, topDocs.totalHits);
        mgr.returnIndexSearcher(searcher);
    } finally {
        FileUtils.deleteFile(dir, true);
    }
}
 
Example 4
Source File: EventIndexTask.java    From nifi with Apache License 2.0 5 votes vote down vote up
/**
 * Re-indexes the documents given. The IndexableDocument's provided are required to have the IndexDirectory provided.
 */
void reIndex(final List<IndexableDocument> toIndex, final CommitPreference commitPreference) throws IOException {
    if (toIndex.isEmpty()) {
        return;
    }

    final Map<File, List<IndexableDocument>> docsByIndexDir = toIndex.stream().collect(Collectors.groupingBy(IndexableDocument::getIndexDirectory));
    for (final Map.Entry<File, List<IndexableDocument>> entry : docsByIndexDir.entrySet()) {
        final File indexDirectory = entry.getKey();
        final List<IndexableDocument> documentsForIndex = entry.getValue();

        final EventIndexWriter indexWriter = indexManager.borrowIndexWriter(indexDirectory);
        try {
            // Remove any documents that already exist in this index that are overlapping.
            long minId = Long.MAX_VALUE;
            long maxId = Long.MIN_VALUE;

            for (final IndexableDocument doc : toIndex) {
                final long eventId = doc.getDocument().getField(SearchableFields.Identifier.getSearchableFieldName()).numericValue().longValue();
                if (eventId < minId) {
                    minId = eventId;
                }
                if (eventId > maxId) {
                    maxId = eventId;
                }
            }

            final Query query = LongPoint.newRangeQuery(SearchableFields.Identifier.getSearchableFieldName(), minId, maxId);
            indexWriter.getIndexWriter().deleteDocuments(query);

            final List<Document> documents = documentsForIndex.stream()
                .map(IndexableDocument::getDocument)
                .collect(Collectors.toList());

            indexWriter.index(documents, commitThreshold);
        } finally {
            indexManager.returnIndexWriter(indexWriter, CommitPreference.FORCE_COMMIT.equals(commitPreference), false);
        }
    }
}
 
Example 5
Source File: TestSimpleIndexManager.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultipleWritersSimultaneouslySameIndex() throws IOException {
    final StandardIndexManager mgr = new StandardIndexManager(new RepositoryConfiguration());
    final File dir = new File("target/" + UUID.randomUUID().toString());
    try {
        final EventIndexWriter writer1 = mgr.borrowIndexWriter(dir);
        final EventIndexWriter writer2 = mgr.borrowIndexWriter(dir);

        final Document doc1 = new Document();
        doc1.add(new StringField("id", "1", Store.YES));

        final Document doc2 = new Document();
        doc2.add(new StringField("id", "2", Store.YES));

        writer1.index(doc1, 1000);
        writer2.index(doc2, 1000);
        mgr.returnIndexWriter(writer2);
        mgr.returnIndexWriter(writer1);

        final EventIndexSearcher searcher = mgr.borrowIndexSearcher(dir);
        final TopDocs topDocs = searcher.getIndexSearcher().search(new MatchAllDocsQuery(), 2);
        assertEquals(2, topDocs.totalHits.value);
        mgr.returnIndexSearcher(searcher);
    } finally {
        FileUtils.deleteFile(dir, true);
    }
}
 
Example 6
Source File: EventIndexTask.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
private void index(final List<StoredDocument> toIndex, final String partitionName) throws IOException {
    if (toIndex.isEmpty()) {
        return;
    }

    // Convert the IndexableDocument list into a List of Documents so that we can pass them to the Index Writer.
    final List<Document> documents = toIndex.stream()
        .map(doc -> doc.getDocument())
        .collect(Collectors.toList());

    boolean requestClose = false;
    boolean requestCommit = false;

    final long minEventTime = toIndex.stream()
        .mapToLong(doc -> doc.getDocument().getField(SearchableFields.EventTime.getSearchableFieldName()).numericValue().longValue())
        .min()
        .getAsLong();

    // Synchronize on the directory manager because we don't want the active directory to change
    // while we are obtaining an index writer for it. I.e., determining the active directory
    // and obtaining an Index Writer for it need to be done atomically.
    final EventIndexWriter indexWriter;
    final File indexDirectory;
    synchronized (directoryManager) {
        indexDirectory = directoryManager.getWritableIndexingDirectory(minEventTime, partitionName);
        indexWriter = indexManager.borrowIndexWriter(indexDirectory);
    }

    try {
        // Perform the actual indexing.
        boolean writerIndicatesCommit = indexWriter.index(documents, commitThreshold);

        // If we don't need to commit index based on what index writer tells us, we will still want
        // to commit the index if it's assigned to a partition and this is no longer the active index
        // for that partition. This prevents the following case:
        //
        // Thread T1: pulls events from queue
        //            Maps events to Index Directory D1
        // Thread T2: pulls events from queue
        //            Maps events to Index Directory D1, the active index for Partition P1.
        //            Writes events to D1.
        //            Commits Index Writer for D1.
        //            Closes Index Writer for D1.
        // Thread T1: Writes events to D1.
        //            Determines that Index Writer for D1 does not need to be committed or closed.
        //
        // In the case outlined above, we would potentially lose those events from the index! To avoid this,
        // we simply decide to commit the index if this writer is no longer the active writer for the index.
        // However, if we have 10 threads, we don't want all 10 threads trying to commit the index after each
        // update. We want to commit when they've all finished. This is what the IndexManager will do if we request
        // that it commit the index. It will also close the index if requested, once all writers have finished.
        // So when this is the case, we will request that the Index Manager both commit and close the writer.

        final Optional<File> activeIndexDirOption = directoryManager.getActiveIndexDirectory(partitionName);
        if (!activeIndexDirOption.isPresent() || !activeIndexDirOption.get().equals(indexDirectory)) {
            requestCommit = true;
            requestClose = true;
        }

        if (writerIndicatesCommit) {
            commit(indexWriter);
            requestCommit = false; // we've already committed the index writer so no need to request that the index manager do so also.
            final boolean directoryManagerIndicatesClose = directoryManager.onIndexCommitted(indexDirectory);
            requestClose = requestClose || directoryManagerIndicatesClose;

            if (logger.isDebugEnabled()) {
                final long maxId = documents.stream()
                    .mapToLong(doc -> doc.getField(SearchableFields.Identifier.getSearchableFieldName()).numericValue().longValue())
                    .max()
                    .orElse(-1L);
                logger.debug("Committed index {} after writing a max Event ID of {}", indexDirectory, maxId);
            }
        }
    } finally {
        indexManager.returnIndexWriter(indexWriter, requestCommit, requestClose);
    }
}
 
Example 7
Source File: EventIndexTask.java    From nifi with Apache License 2.0 4 votes vote down vote up
private void index(final List<StoredDocument> toIndex, final String partitionName) throws IOException {
    if (toIndex.isEmpty()) {
        return;
    }

    // Convert the IndexableDocument list into a List of Documents so that we can pass them to the Index Writer.
    final List<Document> documents = toIndex.stream()
        .map(StoredDocument::getDocument)
        .collect(Collectors.toList());

    boolean requestClose = false;
    boolean requestCommit = false;

    final long minEventTime = toIndex.stream()
        .mapToLong(doc -> doc.getDocument().getField(SearchableFields.EventTime.getSearchableFieldName()).numericValue().longValue())
        .min()
        .getAsLong();

    // Synchronize on the directory manager because we don't want the active directory to change
    // while we are obtaining an index writer for it. I.e., determining the active directory
    // and obtaining an Index Writer for it need to be done atomically.
    final EventIndexWriter indexWriter;
    final File indexDirectory;
    synchronized (directoryManager) {
        indexDirectory = directoryManager.getWritableIndexingDirectory(minEventTime, partitionName);
        indexWriter = indexManager.borrowIndexWriter(indexDirectory);
    }

    try {
        // Perform the actual indexing.
        boolean writerIndicatesCommit = indexWriter.index(documents, commitThreshold);

        // If we don't need to commit index based on what index writer tells us, we will still want
        // to commit the index if it's assigned to a partition and this is no longer the active index
        // for that partition. This prevents the following case:
        //
        // Thread T1: pulls events from queue
        //            Maps events to Index Directory D1
        // Thread T2: pulls events from queue
        //            Maps events to Index Directory D1, the active index for Partition P1.
        //            Writes events to D1.
        //            Commits Index Writer for D1.
        //            Closes Index Writer for D1.
        // Thread T1: Writes events to D1.
        //            Determines that Index Writer for D1 does not need to be committed or closed.
        //
        // In the case outlined above, we would potentially lose those events from the index! To avoid this,
        // we simply decide to commit the index if this writer is no longer the active writer for the index.
        // However, if we have 10 threads, we don't want all 10 threads trying to commit the index after each
        // update. We want to commit when they've all finished. This is what the IndexManager will do if we request
        // that it commit the index. It will also close the index if requested, once all writers have finished.
        // So when this is the case, we will request that the Index Manager both commit and close the writer.

        final Optional<File> activeIndexDirOption = directoryManager.getActiveIndexDirectory(partitionName);
        if (!activeIndexDirOption.isPresent() || !activeIndexDirOption.get().equals(indexDirectory)) {
            requestCommit = true;
            requestClose = true;
        }

        if (writerIndicatesCommit) {
            commit(indexWriter);
            requestCommit = false; // we've already committed the index writer so no need to request that the index manager do so also.
            final boolean directoryManagerIndicatesClose = directoryManager.onIndexCommitted(indexDirectory);
            requestClose = requestClose || directoryManagerIndicatesClose;

            if (logger.isDebugEnabled()) {
                final long maxId = documents.stream()
                    .mapToLong(doc -> doc.getField(SearchableFields.Identifier.getSearchableFieldName()).numericValue().longValue())
                    .max()
                    .orElse(-1L);
                logger.debug("Committed index {} after writing a max Event ID of {}", indexDirectory, maxId);
            }
        }
    } finally {
        indexManager.returnIndexWriter(indexWriter, requestCommit, requestClose);
    }
}