org.apache.nifi.provenance.index.EventIndexWriter Java Examples

The following examples show how to use org.apache.nifi.provenance.index.EventIndexWriter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestSimpleIndexManager.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriterCloseIfOnlyUser() throws IOException {
    final AtomicInteger closeCount = new AtomicInteger(0);

    final SimpleIndexManager mgr = new SimpleIndexManager(new RepositoryConfiguration()) {
        @Override
        protected void close(IndexWriterCount count) throws IOException {
            closeCount.incrementAndGet();
        }
    };

    final File dir = new File("target/" + UUID.randomUUID().toString());

    final EventIndexWriter writer = mgr.borrowIndexWriter(dir);
    mgr.returnIndexWriter(writer, true, true);
    assertEquals(1, closeCount.get());
}
 
Example #2
Source File: TestSimpleIndexManager.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriterLeftOpenIfNotCloseable() throws IOException {
    final AtomicInteger closeCount = new AtomicInteger(0);

    final StandardIndexManager mgr = new StandardIndexManager(new RepositoryConfiguration()) {
        @Override
        protected void close(IndexWriterCount count) throws IOException {
            closeCount.incrementAndGet();
        }
    };

    final File dir = new File("target/" + UUID.randomUUID().toString());

    final EventIndexWriter writer = mgr.borrowIndexWriter(dir);
    mgr.returnIndexWriter(writer, true, false);
    assertEquals(0, closeCount.get());
}
 
Example #3
Source File: TestSimpleIndexManager.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriterCloseIfOnlyUser() throws IOException {
    final AtomicInteger closeCount = new AtomicInteger(0);

    final StandardIndexManager mgr = new StandardIndexManager(new RepositoryConfiguration()) {
        @Override
        protected void close(IndexWriterCount count) throws IOException {
            closeCount.incrementAndGet();
        }
    };

    final File dir = new File("target/" + UUID.randomUUID().toString());

    final EventIndexWriter writer = mgr.borrowIndexWriter(dir);
    mgr.returnIndexWriter(writer, true, true);
    assertEquals(1, closeCount.get());
}
 
Example #4
Source File: StandardIndexManager.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Override
public EventIndexWriter borrowIndexWriter(final File indexDirectory) throws IOException {
    final File absoluteFile = indexDirectory.getAbsoluteFile();
    logger.trace("Borrowing index writer for {}", indexDirectory);

    IndexWriterCount writerCount;
    synchronized (writerCounts) {
        writerCount = writerCounts.get(absoluteFile);

        if (writerCount == null) {
            writerCount = createWriter(indexDirectory);
            writerCounts.put(absoluteFile, writerCount);
        } else {
            logger.trace("Providing existing index writer for {} and incrementing count to {}", indexDirectory, writerCount.getCount() + 1);
            writerCounts.put(absoluteFile, new IndexWriterCount(writerCount.getWriter(),
                writerCount.getAnalyzer(), writerCount.getDirectory(), writerCount.getCount() + 1, writerCount.isCloseableWhenUnused()));
        }

        if (writerCounts.size() > repoConfig.getStorageDirectories().size() * 2) {
            logger.debug("Index Writer returned; writer count map now has size {}; writerCount = {}; full writerCounts map = {}",
                writerCounts.size(), writerCount, writerCounts);
        }
    }

    return writerCount.getWriter();
}
 
Example #5
Source File: MigrateDefunctIndex.java    From nifi with Apache License 2.0 6 votes vote down vote up
private void rebuildIndex(final File tempIndexDir, final File migratedIndexDir) throws IOException {
    final EventIndexWriter writer = indexManager.borrowIndexWriter(tempIndexDir);

    try {
        final EventIterator eventIterator = eventStore.getEventsByTimestamp(minTimestamp, maxTimestamp);

        final StopWatch stopWatch = new StopWatch(true);

        Optional<ProvenanceEventRecord> optionalEvent;
        while ((optionalEvent = eventIterator.nextEvent()).isPresent()) {
            final ProvenanceEventRecord event = optionalEvent.get();

            final Document document = eventConverter.convert(event, event.getEventId());
            writer.index(document, Integer.MAX_VALUE);
            successCount++;
        }

        writer.commit();
        stopWatch.stop();
        logger.info("Successfully indexed {} events to {} in {}", successCount, tempIndexDir, stopWatch.getDuration());
    } finally {
        indexManager.returnIndexWriter(writer, true, true);
    }

    Files.move(tempIndexDir.toPath(), migratedIndexDir.toPath());
}
 
Example #6
Source File: TestSimpleIndexManager.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriterLeftOpenIfNotCloseable() throws IOException {
    final AtomicInteger closeCount = new AtomicInteger(0);

    final SimpleIndexManager mgr = new SimpleIndexManager(new RepositoryConfiguration()) {
        @Override
        protected void close(IndexWriterCount count) throws IOException {
            closeCount.incrementAndGet();
        }
    };

    final File dir = new File("target/" + UUID.randomUUID().toString());

    final EventIndexWriter writer = mgr.borrowIndexWriter(dir);
    mgr.returnIndexWriter(writer, true, false);
    assertEquals(0, closeCount.get());
}
 
Example #7
Source File: SimpleIndexManager.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
@Override
public EventIndexWriter borrowIndexWriter(final File indexDirectory) throws IOException {
    final File absoluteFile = indexDirectory.getAbsoluteFile();
    logger.trace("Borrowing index writer for {}", indexDirectory);

    IndexWriterCount writerCount = null;
    synchronized (writerCounts) {
        writerCount = writerCounts.get(absoluteFile);

        if (writerCount == null) {
            writerCount = createWriter(indexDirectory);
            writerCounts.put(absoluteFile, writerCount);
        } else {
            logger.trace("Providing existing index writer for {} and incrementing count to {}", indexDirectory, writerCount.getCount() + 1);
            writerCounts.put(absoluteFile, new IndexWriterCount(writerCount.getWriter(),
                writerCount.getAnalyzer(), writerCount.getDirectory(), writerCount.getCount() + 1, writerCount.isCloseableWhenUnused()));
        }

        if (writerCounts.size() > repoConfig.getStorageDirectories().size() * 2) {
            logger.debug("Index Writer returned; writer count map now has size {}; writerCount = {}; full writerCounts map = {}",
                writerCounts.size(), writerCount, writerCounts);
        }
    }

    return writerCount.getWriter();
}
 
Example #8
Source File: SimpleIndexManager.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
public IndexWriterCount(final EventIndexWriter writer, final Analyzer analyzer, final Directory directory, final int count, final boolean closeableWhenUnused) {
    this.writer = writer;
    this.analyzer = analyzer;
    this.directory = directory;
    this.count = count;
    this.closeableWhenUnused = closeableWhenUnused;
}
 
Example #9
Source File: LuceneEventIndex.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void commitChanges(final String partitionName) throws IOException {
    final Optional<File> indexDir = directoryManager.getActiveIndexDirectory(partitionName);
    if (indexDir.isPresent()) {
        final EventIndexWriter eventIndexWriter = indexManager.borrowIndexWriter(indexDir.get());
        try {
            eventIndexWriter.commit();
        } finally {
            indexManager.returnIndexWriter(eventIndexWriter, false, false);
        }
    }
}
 
Example #10
Source File: StandardIndexManager.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
public EventIndexSearcher borrowIndexSearcher(final File indexDir) throws IOException {
    final File absoluteFile = indexDir.getAbsoluteFile();

    final IndexWriterCount writerCount;
    synchronized (writerCounts) {
        writerCount = writerCounts.remove(absoluteFile);

        if (writerCount != null) {
            // Increment writer count and create an Index Searcher based on the writer
            writerCounts.put(absoluteFile, new IndexWriterCount(writerCount.getWriter(), writerCount.getAnalyzer(),
                writerCount.getDirectory(), writerCount.getCount() + 1, writerCount.isCloseableWhenUnused()));
        }
    }

    final DirectoryReader directoryReader;
    if (writerCount == null) {
        logger.trace("Creating index searcher for {}", indexDir);
        final Directory directory = FSDirectory.open(indexDir.toPath());
        directoryReader = DirectoryReader.open(directory);
    } else {
        final EventIndexWriter eventIndexWriter = writerCount.getWriter();
        directoryReader = DirectoryReader.open(eventIndexWriter.getIndexWriter(), false, false);
    }

    final IndexSearcher searcher = new IndexSearcher(directoryReader, this.searchExecutor);

    logger.trace("Created index searcher {} for {}", searcher, indexDir);
    return new LuceneEventIndexSearcher(searcher, indexDir, null, directoryReader);
}
 
Example #11
Source File: StandardIndexManager.java    From nifi with Apache License 2.0 5 votes vote down vote up
private IndexWriterCount createWriter(final File indexDirectory) throws IOException {
    final List<Closeable> closeables = new ArrayList<>();
    final Directory directory = FSDirectory.open(indexDirectory.toPath());
    closeables.add(directory);

    try {
        final Analyzer analyzer = new StandardAnalyzer();
        closeables.add(analyzer);

        final IndexWriterConfig config = new IndexWriterConfig(analyzer);

        final ConcurrentMergeScheduler mergeScheduler = new ConcurrentMergeScheduler();
        final int mergeThreads = repoConfig.getConcurrentMergeThreads();
        mergeScheduler.setMaxMergesAndThreads(mergeThreads, mergeThreads);
        config.setMergeScheduler(mergeScheduler);

        final IndexWriter indexWriter = new IndexWriter(directory, config);
        final EventIndexWriter eventIndexWriter = new LuceneEventIndexWriter(indexWriter, indexDirectory);

        final IndexWriterCount writerCount = new IndexWriterCount(eventIndexWriter, analyzer, directory, 1, false);
        logger.debug("Providing new index writer for {}", indexDirectory);
        return writerCount;
    } catch (final IOException ioe) {
        for (final Closeable closeable : closeables) {
            try {
                closeable.close();
            } catch (final IOException ioe2) {
                ioe.addSuppressed(ioe2);
            }
        }

        throw ioe;
    }
}
 
Example #12
Source File: EventIndexTask.java    From nifi with Apache License 2.0 5 votes vote down vote up
/**
 * Re-indexes the documents given. The IndexableDocument's provided are required to have the IndexDirectory provided.
 */
void reIndex(final List<IndexableDocument> toIndex, final CommitPreference commitPreference) throws IOException {
    if (toIndex.isEmpty()) {
        return;
    }

    final Map<File, List<IndexableDocument>> docsByIndexDir = toIndex.stream().collect(Collectors.groupingBy(IndexableDocument::getIndexDirectory));
    for (final Map.Entry<File, List<IndexableDocument>> entry : docsByIndexDir.entrySet()) {
        final File indexDirectory = entry.getKey();
        final List<IndexableDocument> documentsForIndex = entry.getValue();

        final EventIndexWriter indexWriter = indexManager.borrowIndexWriter(indexDirectory);
        try {
            // Remove any documents that already exist in this index that are overlapping.
            long minId = Long.MAX_VALUE;
            long maxId = Long.MIN_VALUE;

            for (final IndexableDocument doc : toIndex) {
                final long eventId = doc.getDocument().getField(SearchableFields.Identifier.getSearchableFieldName()).numericValue().longValue();
                if (eventId < minId) {
                    minId = eventId;
                }
                if (eventId > maxId) {
                    maxId = eventId;
                }
            }

            final Query query = LongPoint.newRangeQuery(SearchableFields.Identifier.getSearchableFieldName(), minId, maxId);
            indexWriter.getIndexWriter().deleteDocuments(query);

            final List<Document> documents = documentsForIndex.stream()
                .map(IndexableDocument::getDocument)
                .collect(Collectors.toList());

            indexWriter.index(documents, commitThreshold);
        } finally {
            indexManager.returnIndexWriter(indexWriter, CommitPreference.FORCE_COMMIT.equals(commitPreference), false);
        }
    }
}
 
Example #13
Source File: TestSimpleIndexManager.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriterCloseIfPreviouslyMarkedCloseable() throws IOException {
    final AtomicInteger closeCount = new AtomicInteger(0);

    final SimpleIndexManager mgr = new SimpleIndexManager(new RepositoryConfiguration()) {
        @Override
        protected void close(IndexWriterCount count) throws IOException {
            closeCount.incrementAndGet();
        }
    };

    final File dir = new File("target/" + UUID.randomUUID().toString());

    final EventIndexWriter writer1 = mgr.borrowIndexWriter(dir);
    final EventIndexWriter writer2 = mgr.borrowIndexWriter(dir);
    assertTrue(writer1 == writer2);

    mgr.returnIndexWriter(writer1, true, true);
    assertEquals(0, closeCount.get());

    final EventIndexWriter[] writers = new EventIndexWriter[10];
    for (int i = 0; i < writers.length; i++) {
        writers[i] = mgr.borrowIndexWriter(dir);
        assertTrue(writers[i] == writer1);
    }

    for (int i = 0; i < writers.length; i++) {
        mgr.returnIndexWriter(writers[i], true, false);
        assertEquals(0, closeCount.get());
        assertEquals(1, mgr.getWriterCount());
    }

    // this should close the index writer even though 'false' is passed in
    // because the previous call marked the writer as closeable and this is
    // the last reference to the writer.
    mgr.returnIndexWriter(writer2, false, false);
    assertEquals(1, closeCount.get());
    assertEquals(0, mgr.getWriterCount());
}
 
Example #14
Source File: TestSimpleIndexManager.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultipleWritersSimultaneouslySameIndex() throws IOException {
    final SimpleIndexManager mgr = new SimpleIndexManager(new RepositoryConfiguration());
    final File dir = new File("target/" + UUID.randomUUID().toString());
    try {
        final EventIndexWriter writer1 = mgr.borrowIndexWriter(dir);
        final EventIndexWriter writer2 = mgr.borrowIndexWriter(dir);

        final Document doc1 = new Document();
        doc1.add(new StringField("id", "1", Store.YES));

        final Document doc2 = new Document();
        doc2.add(new StringField("id", "2", Store.YES));

        writer1.index(doc1, 1000);
        writer2.index(doc2, 1000);
        mgr.returnIndexWriter(writer2);
        mgr.returnIndexWriter(writer1);

        final EventIndexSearcher searcher = mgr.borrowIndexSearcher(dir);
        final TopDocs topDocs = searcher.getIndexSearcher().search(new MatchAllDocsQuery(), 2);
        assertEquals(2, topDocs.totalHits);
        mgr.returnIndexSearcher(searcher);
    } finally {
        FileUtils.deleteFile(dir, true);
    }
}
 
Example #15
Source File: TestCachingIndexManager.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws IOException {
    // Create and IndexWriter and add a document to the index, then close the writer.
    // This gives us something that we can query.
    final EventIndexWriter writer = manager.borrowIndexWriter(indexDir);
    final Document doc = new Document();
    doc.add(new StringField("unit test", "true", Store.YES));
    writer.index(doc, 1000);
    manager.returnIndexWriter(writer);

    // Get an Index Searcher that we can use to query the index.
    final EventIndexSearcher cachedSearcher = manager.borrowIndexSearcher(indexDir);

    // Ensure that we get the expected results.
    assertCount(cachedSearcher, 1);

    // While we already have an Index Searcher, get a writer for the same index.
    // This will cause the Index Searcher to be marked as poisoned.
    final EventIndexWriter writer2 = manager.borrowIndexWriter(indexDir);

    // Obtain a new Index Searcher with the writer open. This Index Searcher should *NOT*
    // be the same as the previous searcher because the new one will be a Near-Real-Time Index Searcher
    // while the other is not.
    final EventIndexSearcher nrtSearcher = manager.borrowIndexSearcher(indexDir);
    assertNotSame(cachedSearcher, nrtSearcher);

    // Ensure that we get the expected query results.
    assertCount(nrtSearcher, 1);

    // Return the writer, so that there is no longer an active writer for the index.
    manager.returnIndexWriter(writer2);

    // Ensure that we still get the same result.
    assertCount(cachedSearcher, 1);
    manager.returnIndexSearcher(cachedSearcher);

    // Ensure that our near-real-time index searcher still gets the same result.
    assertCount(nrtSearcher, 1);
    manager.returnIndexSearcher(nrtSearcher);
}
 
Example #16
Source File: StandardIndexManager.java    From nifi with Apache License 2.0 5 votes vote down vote up
public IndexWriterCount(final EventIndexWriter writer, final Analyzer analyzer, final Directory directory, final int count, final boolean closeableWhenUnused) {
    this.writer = writer;
    this.analyzer = analyzer;
    this.directory = directory;
    this.count = count;
    this.closeableWhenUnused = closeableWhenUnused;
}
 
Example #17
Source File: SimpleIndexManager.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
private IndexWriterCount createWriter(final File indexDirectory) throws IOException {
    final List<Closeable> closeables = new ArrayList<>();
    final Directory directory = FSDirectory.open(indexDirectory);
    closeables.add(directory);

    try {
        final Analyzer analyzer = new StandardAnalyzer();
        closeables.add(analyzer);

        final IndexWriterConfig config = new IndexWriterConfig(LuceneUtil.LUCENE_VERSION, analyzer);

        final ConcurrentMergeScheduler mergeScheduler = new ConcurrentMergeScheduler();
        final int mergeThreads = repoConfig.getConcurrentMergeThreads();
        mergeScheduler.setMaxMergesAndThreads(mergeThreads, mergeThreads);
        config.setMergeScheduler(mergeScheduler);

        final IndexWriter indexWriter = new IndexWriter(directory, config);
        final EventIndexWriter eventIndexWriter = new LuceneEventIndexWriter(indexWriter, indexDirectory);

        final IndexWriterCount writerCount = new IndexWriterCount(eventIndexWriter, analyzer, directory, 1, false);
        logger.debug("Providing new index writer for {}", indexDirectory);
        return writerCount;
    } catch (final IOException ioe) {
        for (final Closeable closeable : closeables) {
            try {
                closeable.close();
            } catch (final IOException ioe2) {
                ioe.addSuppressed(ioe2);
            }
        }

        throw ioe;
    }
}
 
Example #18
Source File: SimpleIndexManager.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Override
public EventIndexSearcher borrowIndexSearcher(final File indexDir) throws IOException {
    final File absoluteFile = indexDir.getAbsoluteFile();

    final IndexWriterCount writerCount;
    synchronized (writerCounts) {
        writerCount = writerCounts.remove(absoluteFile);

        if (writerCount != null) {
            // Increment writer count and create an Index Searcher based on the writer
            writerCounts.put(absoluteFile, new IndexWriterCount(writerCount.getWriter(), writerCount.getAnalyzer(),
                writerCount.getDirectory(), writerCount.getCount() + 1, writerCount.isCloseableWhenUnused()));
        }
    }

    final DirectoryReader directoryReader;
    if (writerCount == null) {
        logger.trace("Creating index searcher for {}", indexDir);
        final Directory directory = FSDirectory.open(indexDir);
        directoryReader = DirectoryReader.open(directory);
    } else {
        final EventIndexWriter eventIndexWriter = writerCount.getWriter();
        directoryReader = DirectoryReader.open(eventIndexWriter.getIndexWriter(), false);
    }

    final IndexSearcher searcher = new IndexSearcher(directoryReader, this.searchExecutor);

    logger.trace("Created index searcher {} for {}", searcher, indexDir);
    return new LuceneEventIndexSearcher(searcher, indexDir, null, directoryReader);
}
 
Example #19
Source File: TestSimpleIndexManager.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultipleWritersSimultaneouslySameIndex() throws IOException {
    final StandardIndexManager mgr = new StandardIndexManager(new RepositoryConfiguration());
    final File dir = new File("target/" + UUID.randomUUID().toString());
    try {
        final EventIndexWriter writer1 = mgr.borrowIndexWriter(dir);
        final EventIndexWriter writer2 = mgr.borrowIndexWriter(dir);

        final Document doc1 = new Document();
        doc1.add(new StringField("id", "1", Store.YES));

        final Document doc2 = new Document();
        doc2.add(new StringField("id", "2", Store.YES));

        writer1.index(doc1, 1000);
        writer2.index(doc2, 1000);
        mgr.returnIndexWriter(writer2);
        mgr.returnIndexWriter(writer1);

        final EventIndexSearcher searcher = mgr.borrowIndexSearcher(dir);
        final TopDocs topDocs = searcher.getIndexSearcher().search(new MatchAllDocsQuery(), 2);
        assertEquals(2, topDocs.totalHits.value);
        mgr.returnIndexSearcher(searcher);
    } finally {
        FileUtils.deleteFile(dir, true);
    }
}
 
Example #20
Source File: TestSimpleIndexManager.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriterCloseIfPreviouslyMarkedCloseable() throws IOException {
    final AtomicInteger closeCount = new AtomicInteger(0);

    final StandardIndexManager mgr = new StandardIndexManager(new RepositoryConfiguration()) {
        @Override
        protected void close(IndexWriterCount count) throws IOException {
            closeCount.incrementAndGet();
        }
    };

    final File dir = new File("target/" + UUID.randomUUID().toString());

    final EventIndexWriter writer1 = mgr.borrowIndexWriter(dir);
    final EventIndexWriter writer2 = mgr.borrowIndexWriter(dir);
    assertTrue(writer1 == writer2);

    mgr.returnIndexWriter(writer1, true, true);
    assertEquals(0, closeCount.get());

    final EventIndexWriter[] writers = new EventIndexWriter[10];
    for (int i = 0; i < writers.length; i++) {
        writers[i] = mgr.borrowIndexWriter(dir);
        assertTrue(writers[i] == writer1);
    }

    for (int i = 0; i < writers.length; i++) {
        mgr.returnIndexWriter(writers[i], true, false);
        assertEquals(0, closeCount.get());
        assertEquals(1, mgr.getWriterCount());
    }

    // this should close the index writer even though 'false' is passed in
    // because the previous call marked the writer as closeable and this is
    // the last reference to the writer.
    mgr.returnIndexWriter(writer2, false, false);
    assertEquals(1, closeCount.get());
    assertEquals(0, mgr.getWriterCount());
}
 
Example #21
Source File: LuceneEventIndex.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void commitChanges(final String partitionName) throws IOException {
    final Optional<File> indexDir = directoryManager.getActiveIndexDirectory(partitionName);
    if (indexDir.isPresent()) {
        final EventIndexWriter eventIndexWriter = indexManager.borrowIndexWriter(indexDir.get());
        try {
            eventIndexWriter.commit();
        } finally {
            indexManager.returnIndexWriter(eventIndexWriter, false, false);
        }
    }
}
 
Example #22
Source File: StandardIndexManager.java    From nifi with Apache License 2.0 4 votes vote down vote up
public EventIndexWriter getWriter() {
    return writer;
}
 
Example #23
Source File: EventIndexTask.java    From nifi with Apache License 2.0 4 votes vote down vote up
private void index(final List<StoredDocument> toIndex, final String partitionName) throws IOException {
    if (toIndex.isEmpty()) {
        return;
    }

    // Convert the IndexableDocument list into a List of Documents so that we can pass them to the Index Writer.
    final List<Document> documents = toIndex.stream()
        .map(StoredDocument::getDocument)
        .collect(Collectors.toList());

    boolean requestClose = false;
    boolean requestCommit = false;

    final long minEventTime = toIndex.stream()
        .mapToLong(doc -> doc.getDocument().getField(SearchableFields.EventTime.getSearchableFieldName()).numericValue().longValue())
        .min()
        .getAsLong();

    // Synchronize on the directory manager because we don't want the active directory to change
    // while we are obtaining an index writer for it. I.e., determining the active directory
    // and obtaining an Index Writer for it need to be done atomically.
    final EventIndexWriter indexWriter;
    final File indexDirectory;
    synchronized (directoryManager) {
        indexDirectory = directoryManager.getWritableIndexingDirectory(minEventTime, partitionName);
        indexWriter = indexManager.borrowIndexWriter(indexDirectory);
    }

    try {
        // Perform the actual indexing.
        boolean writerIndicatesCommit = indexWriter.index(documents, commitThreshold);

        // If we don't need to commit index based on what index writer tells us, we will still want
        // to commit the index if it's assigned to a partition and this is no longer the active index
        // for that partition. This prevents the following case:
        //
        // Thread T1: pulls events from queue
        //            Maps events to Index Directory D1
        // Thread T2: pulls events from queue
        //            Maps events to Index Directory D1, the active index for Partition P1.
        //            Writes events to D1.
        //            Commits Index Writer for D1.
        //            Closes Index Writer for D1.
        // Thread T1: Writes events to D1.
        //            Determines that Index Writer for D1 does not need to be committed or closed.
        //
        // In the case outlined above, we would potentially lose those events from the index! To avoid this,
        // we simply decide to commit the index if this writer is no longer the active writer for the index.
        // However, if we have 10 threads, we don't want all 10 threads trying to commit the index after each
        // update. We want to commit when they've all finished. This is what the IndexManager will do if we request
        // that it commit the index. It will also close the index if requested, once all writers have finished.
        // So when this is the case, we will request that the Index Manager both commit and close the writer.

        final Optional<File> activeIndexDirOption = directoryManager.getActiveIndexDirectory(partitionName);
        if (!activeIndexDirOption.isPresent() || !activeIndexDirOption.get().equals(indexDirectory)) {
            requestCommit = true;
            requestClose = true;
        }

        if (writerIndicatesCommit) {
            commit(indexWriter);
            requestCommit = false; // we've already committed the index writer so no need to request that the index manager do so also.
            final boolean directoryManagerIndicatesClose = directoryManager.onIndexCommitted(indexDirectory);
            requestClose = requestClose || directoryManagerIndicatesClose;

            if (logger.isDebugEnabled()) {
                final long maxId = documents.stream()
                    .mapToLong(doc -> doc.getField(SearchableFields.Identifier.getSearchableFieldName()).numericValue().longValue())
                    .max()
                    .orElse(-1L);
                logger.debug("Committed index {} after writing a max Event ID of {}", indexDirectory, maxId);
            }
        }
    } finally {
        indexManager.returnIndexWriter(indexWriter, requestCommit, requestClose);
    }
}
 
Example #24
Source File: StandardIndexManager.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void returnIndexWriter(final EventIndexWriter writer, final boolean commit, final boolean isCloseable) {
    final File indexDirectory = writer.getDirectory();
    final File absoluteFile = indexDirectory.getAbsoluteFile();
    logger.trace("Returning Index Writer for {} to IndexManager", indexDirectory);

    boolean unused = false;
    IndexWriterCount count;
    boolean close = isCloseable;
    try {
        synchronized (writerCounts) {
            count = writerCounts.get(absoluteFile);
            if (count != null && count.isCloseableWhenUnused()) {
                close = true;
            }

            if (count == null) {
                logger.warn("Index Writer {} was returned to IndexManager for {}, but this writer is not known. "
                    + "This could potentially lead to a resource leak", writer, indexDirectory);
                writer.close();
            } else if (count.getCount() <= 1) {
                // we are finished with this writer.
                unused = true;
                if (close) {
                    logger.debug("Decrementing count for Index Writer for {} to {}; closing writer", indexDirectory, count.getCount() - 1);
                    writerCounts.remove(absoluteFile);
                } else {
                    logger.trace("Decrementing count for Index Writer for {} to {}", indexDirectory, count.getCount() - 1);

                    // If writer is not closeable, then we need to decrement its count.
                    writerCounts.put(absoluteFile, new IndexWriterCount(count.getWriter(), count.getAnalyzer(), count.getDirectory(),
                        count.getCount() - 1, close));
                }
            } else {
                // decrement the count.
                if (close) {
                    logger.debug("Decrementing count for Index Writer for {} to {} and marking as closeable when no longer in use", indexDirectory, count.getCount() - 1);
                } else {
                    logger.trace("Decrementing count for Index Writer for {} to {}", indexDirectory, count.getCount() - 1);
                }

                writerCounts.put(absoluteFile, new IndexWriterCount(count.getWriter(), count.getAnalyzer(),
                    count.getDirectory(), count.getCount() - 1, close));
            }

            if (writerCounts.size() > repoConfig.getStorageDirectories().size() * 2) {
                logger.debug("Index Writer returned; writer count map now has size {}; writer = {}, commit = {}, isCloseable = {}, writerCount = {}; full writerCounts Map = {}",
                    writerCounts.size(), writer, commit, isCloseable, count, writerCounts);
            }
        }

        // Committing and closing are very expensive, so we want to do those outside of the synchronized block.
        // So we use an 'unused' variable to tell us whether or not we should actually do so.
        if (unused) {
            try {
                if (commit) {
                    writer.commit();
                }
            } finally {
                if (close) {
                    logger.info("Index Writer for {} has been returned to Index Manager and is no longer in use. Closing Index Writer", indexDirectory);
                    close(count);
                }
            }
        }
    } catch (final Exception e) {
        logger.warn("Failed to close Index Writer {} due to {}", writer, e.toString(), e);
    }
}
 
Example #25
Source File: DeleteIndexAction.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public File execute(final File expiredFile) throws IOException {
    // count the number of records and determine the max event id that we are deleting.
    final long numDeleted = 0;
    long maxEventId = -1L;
    try (final RecordReader reader = RecordReaders.newRecordReader(expiredFile, repository.getAllLogFiles(), Integer.MAX_VALUE)) {
        maxEventId = reader.getMaxEventId();
    } catch (final IOException ioe) {
        logger.warn("Failed to obtain max ID present in journal file {}", expiredFile.getAbsolutePath());
    }

    // remove the records from the index
    final List<File> indexDirs = indexConfiguration.getIndexDirectories(expiredFile);
    for (final File indexingDirectory : indexDirs) {
        final Term term = new Term(FieldNames.STORAGE_FILENAME, LuceneUtil.substringBefore(expiredFile.getName(), "."));

        boolean deleteDir = false;
        final EventIndexWriter writer = indexManager.borrowIndexWriter(indexingDirectory);
        try {
            final IndexWriter indexWriter = writer.getIndexWriter();
            indexWriter.deleteDocuments(term);
            indexWriter.commit();
            final int docsLeft = indexWriter.getDocStats().numDocs;
            deleteDir = docsLeft <= 0;
            logger.debug("After expiring {}, there are {} docs left for index {}", expiredFile, docsLeft, indexingDirectory);
        } finally {
            indexManager.returnIndexWriter(writer);
        }

        // we've confirmed that all documents have been removed. Delete the index directory.
        if (deleteDir) {
            indexManager.removeIndex(indexingDirectory);
            indexConfiguration.removeIndexDirectory(indexingDirectory);

            deleteDirectory(indexingDirectory);
            logger.info("Removed empty index directory {}", indexingDirectory);
        }
    }

    // Update the minimum index to 1 more than the max Event ID in this file.
    if (maxEventId > -1L) {
        indexConfiguration.setMinIdIndexed(maxEventId + 1L);
    }

    logger.info("Deleted Indices for Expired Provenance File {} from {} index files; {} documents removed", expiredFile, indexDirs.size(), numDeleted);
    return expiredFile;
}
 
Example #26
Source File: StandardIndexManager.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void returnIndexWriter(final EventIndexWriter writer) {
    returnIndexWriter(writer, true, true);
}
 
Example #27
Source File: CachingIndexManager.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public EventIndexWriter borrowIndexWriter(final File indexDirectory) throws IOException {
    final File absoluteFile = indexDirectory.getAbsoluteFile();
    logger.trace("Borrowing index writer for {}", indexDirectory);

    lock.lock();
    try {
        IndexWriterCount writerCount = writerCounts.remove(absoluteFile);
        if (writerCount == null) {
            final List<Closeable> closeables = new ArrayList<>();
            final Directory directory = FSDirectory.open(indexDirectory);
            closeables.add(directory);

            try {
                final Analyzer analyzer = new StandardAnalyzer();
                closeables.add(analyzer);

                final IndexWriterConfig config = new IndexWriterConfig(LuceneUtil.LUCENE_VERSION, analyzer);
                config.setWriteLockTimeout(300000L);

                final IndexWriter indexWriter = new IndexWriter(directory, config);
                final EventIndexWriter eventIndexWriter = new LuceneEventIndexWriter(indexWriter, indexDirectory);
                writerCount = new IndexWriterCount(eventIndexWriter, analyzer, directory, 1);
                logger.debug("Providing new index writer for {}", indexDirectory);
            } catch (final IOException ioe) {
                for (final Closeable closeable : closeables) {
                    try {
                        closeable.close();
                    } catch (final IOException ioe2) {
                        ioe.addSuppressed(ioe2);
                    }
                }

                throw ioe;
            }

            writerCounts.put(absoluteFile, writerCount);

            // Mark any active searchers as poisoned because we are updating the index
            final List<ActiveIndexSearcher> searchers = activeSearchers.get(absoluteFile);
            if (searchers != null) {
                for (final ActiveIndexSearcher activeSearcher : searchers) {
                    logger.debug("Poisoning {} because it is searching {}, which is getting updated", activeSearcher, indexDirectory);
                    activeSearcher.poison();
                }
            }
        } else {
            logger.debug("Providing existing index writer for {} and incrementing count to {}", indexDirectory, writerCount.getCount() + 1);
            writerCounts.put(absoluteFile, new IndexWriterCount(writerCount.getWriter(),
                writerCount.getAnalyzer(), writerCount.getDirectory(), writerCount.getCount() + 1));
        }

        return writerCount.getWriter();
    } finally {
        lock.unlock();
    }
}
 
Example #28
Source File: EventIndexTask.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
protected void commit(final EventIndexWriter indexWriter) throws IOException {
    final long start = System.nanoTime();
    final long approximateCommitCount = indexWriter.commit();
    final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
    logger.debug("Successfully committed approximately {} Events to {} in {} millis", approximateCommitCount, indexWriter, millis);
}
 
Example #29
Source File: CachingIndexManager.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void returnIndexWriter(final EventIndexWriter writer) {
    returnIndexWriter(writer, true, true);
}
 
Example #30
Source File: EventIndexTask.java    From nifi with Apache License 2.0 4 votes vote down vote up
protected void commit(final EventIndexWriter indexWriter) throws IOException {
    final long start = System.nanoTime();
    final long approximateCommitCount = indexWriter.commit();
    final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
    logger.debug("Successfully committed approximately {} Events to {} in {} millis", approximateCommitCount, indexWriter, millis);
}