org.apache.nifi.provenance.serialization.StorageSummary Java Examples

The following examples show how to use org.apache.nifi.provenance.serialization.StorageSummary. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestPartitionedWriteAheadEventStore.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void testSingleWriteThenRead() throws IOException {
    final PartitionedWriteAheadEventStore store = new PartitionedWriteAheadEventStore(createConfig(), writerFactory, readerFactory, EventReporter.NO_OP, new EventFileManager());
    store.initialize();

    assertEquals(-1, store.getMaxEventId());
    final ProvenanceEventRecord event1 = createEvent();
    final StorageResult result = store.addEvents(Collections.singleton(event1));

    final StorageSummary summary = result.getStorageLocations().values().iterator().next();
    final long eventId = summary.getEventId();
    final ProvenanceEventRecord eventWithId = addId(event1, eventId);

    assertEquals(0, store.getMaxEventId());

    final ProvenanceEventRecord read = store.getEvent(eventId).get();
    assertEquals(eventWithId, read);
}
 
Example #2
Source File: TestPartitionedWriteAheadEventStore.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void testSingleWriteThenRead() throws IOException {
    final PartitionedWriteAheadEventStore store = new PartitionedWriteAheadEventStore(createConfig(), writerFactory, readerFactory, EventReporter.NO_OP, new EventFileManager());
    store.initialize();

    assertEquals(-1, store.getMaxEventId());
    final ProvenanceEventRecord event1 = createEvent();
    final StorageResult result = store.addEvents(Collections.singleton(event1));

    final StorageSummary summary = result.getStorageLocations().values().iterator().next();
    final long eventId = summary.getEventId();
    final ProvenanceEventRecord eventWithId = addId(event1, eventId);

    assertEquals(0, store.getMaxEventId());

    final ProvenanceEventRecord read = store.getEvent(eventId).get();
    assertEquals(eventWithId, read);
}
 
Example #3
Source File: WriteAheadProvenanceRepository.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
@Override
public void registerEvents(final Iterable<ProvenanceEventRecord> events) {
    final StorageResult storageResult;

    try {
        storageResult = eventStore.addEvents(events);
    } catch (final IOException e) {
        logger.error("Failed to write events to the Event Store", e);
        eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to write Provenance Events to the repository. See logs for more details.");
        return;
    }

    final Map<ProvenanceEventRecord, StorageSummary> locationMap = storageResult.getStorageLocations();
    if (!locationMap.isEmpty()) {
        eventIndex.addEvents(locationMap);
    }
}
 
Example #4
Source File: WriteAheadProvenanceRepository.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Override
public void registerEvents(final Iterable<ProvenanceEventRecord> events) {
    final StorageResult storageResult;

    try {
        storageResult = eventStore.addEvents(events);
    } catch (final IOException e) {
        logger.error("Failed to write events to the Event Store", e);
        eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to write Provenance Events to the repository. See logs for more details.");
        return;
    }

    final Map<ProvenanceEventRecord, StorageSummary> locationMap = storageResult.getStorageLocations();
    if (!locationMap.isEmpty()) {
        eventIndex.addEvents(locationMap);
    }
}
 
Example #5
Source File: LuceneEventIndex.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void addEvents(final Map<ProvenanceEventRecord, StorageSummary> events) {
    eventsIndexed.add(new TimestampedLong((long) events.size()));

    for (final Map.Entry<ProvenanceEventRecord, StorageSummary> entry : events.entrySet()) {
        addEvent(entry.getKey(), entry.getValue());
    }
}
 
Example #6
Source File: TestLuceneEventIndex.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 5000)
public void addThenQueryWithEmptyQuery() throws InterruptedException {
    final RepositoryConfiguration repoConfig = createConfig();
    final IndexManager indexManager = new SimpleIndexManager(repoConfig);

    final LuceneEventIndex index = new LuceneEventIndex(repoConfig, indexManager, 1, EventReporter.NO_OP);

    final ProvenanceEventRecord event = createEvent();

    index.addEvent(event, new StorageSummary(event.getEventId(), "1.prov", "1", 1, 2L, 2L));

    final Query query = new Query(UUID.randomUUID().toString());

    final ArrayListEventStore eventStore = new ArrayListEventStore();
    eventStore.addEvent(event);
    index.initialize(eventStore);

    // We don't know how long it will take for the event to be indexed, so keep querying until
    // we get a result. The test will timeout after 5 seconds if we've still not succeeded.
    List<ProvenanceEventRecord> matchingEvents = Collections.emptyList();
    while (matchingEvents.isEmpty()) {
        final QuerySubmission submission = index.submitQuery(query, EventAuthorizer.GRANT_ALL, "unit test user");
        assertNotNull(submission);

        final QueryResult result = submission.getResult();
        assertNotNull(result);
        result.awaitCompletion(100, TimeUnit.MILLISECONDS);

        assertTrue(result.isFinished());
        assertNull(result.getError());

        matchingEvents = result.getMatchingEvents();
        assertNotNull(matchingEvents);
        Thread.sleep(100L); // avoid crushing the CPU
    }

    assertEquals(1, matchingEvents.size());
    assertEquals(event, matchingEvents.get(0));
}
 
Example #7
Source File: LuceneEventIndex.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void addEvents(final Map<ProvenanceEventRecord, StorageSummary> events) {
    eventsIndexed.add(new TimestampedLong((long) events.size()));

    for (final Map.Entry<ProvenanceEventRecord, StorageSummary> entry : events.entrySet()) {
        addEvent(entry.getKey(), entry.getValue());
    }
}
 
Example #8
Source File: ArrayListEventStore.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Override
public synchronized StorageResult addEvents(Iterable<ProvenanceEventRecord> events) {
    final Map<ProvenanceEventRecord, StorageSummary> storageLocations = new HashMap<>();

    for (final ProvenanceEventRecord event : events) {
        this.events.add(event);

        final StorageSummary storageSummary = new StorageSummary(idGenerator.getAndIncrement(), "location", "1", 1, 0L, 0L);
        storageLocations.put(event, storageSummary);
    }

    return new StorageResult() {
        @Override
        public Map<ProvenanceEventRecord, StorageSummary> getStorageLocations() {
            return storageLocations;
        }

        @Override
        public boolean triggeredRollover() {
            return false;
        }

        @Override
        public Integer getEventsRolledOver() {
            return null;
        }
    };
}
 
Example #9
Source File: WriteAheadStorePartition.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
private Map<ProvenanceEventRecord, StorageSummary> addEvents(final Iterable<ProvenanceEventRecord> events, final RecordWriter writer) throws IOException {
    final Map<ProvenanceEventRecord, StorageSummary> locationMap = new HashMap<>();

    try {
        long maxId = -1L;
        int numEvents = 0;
        for (final ProvenanceEventRecord nextEvent : events) {
            final StorageSummary writerSummary = writer.writeRecord(nextEvent);
            final StorageSummary summaryWithIndex = new StorageSummary(writerSummary.getEventId(), writerSummary.getStorageLocation(), this.partitionName,
                writerSummary.getBlockIndex(), writerSummary.getSerializedLength(), writerSummary.getBytesWritten());
            locationMap.put(nextEvent, summaryWithIndex);
            maxId = summaryWithIndex.getEventId();
            numEvents++;
        }

        if (numEvents == 0) {
            return locationMap;
        }

        writer.flush();

        // Update max event id to be equal to be the greater of the current value or the
        // max value just written.
        final long maxIdWritten = maxId;
        this.maxEventId.getAndUpdate(cur -> maxIdWritten > cur ? maxIdWritten : cur);

        if (config.isAlwaysSync()) {
            writer.sync();
        }
    } catch (final Exception e) {
        // We need to set the repoDirty flag before we release the lock for this journal.
        // Otherwise, another thread may write to this journal -- this is a problem because
        // the journal contains part of our record but not all of it. Writing to the end of this
        // journal will result in corruption!
        writer.markDirty();
        throw e;
    }

    return locationMap;
}
 
Example #10
Source File: LuceneEventIndex.java    From nifi with Apache License 2.0 5 votes vote down vote up
protected void addEvent(final ProvenanceEventRecord event, final StorageSummary location) {
    for (final CachedQuery cachedQuery : cachedQueries) {
        cachedQuery.update(event, location);
    }

    final Document document = eventConverter.convert(event, location);
    if (document == null) {
        logger.debug("Received Provenance Event {} to index but it contained no information that should be indexed, so skipping it", event.getEventId());
    } else {
        final StoredDocument doc = new StoredDocument(document, location);
        boolean added = false;
        while (!added && !closed) {

            added = documentQueue.offer(doc);
            if (!added) {
                final long start = System.nanoTime();
                try {
                    added = documentQueue.offer(doc, 1, TimeUnit.SECONDS);
                } catch (final InterruptedException e) {
                    Thread.currentThread().interrupt();
                    logger.warn("Interrupted while attempting to enqueue Provenance Event for indexing; this event will not be indexed");
                    return;
                }
                final long nanos = System.nanoTime() - start;
                queuePauseNanos.add(new TimestampedLong(nanos));
            }

            if (added) {
                final long totalEventCount = eventCount.incrementAndGet();
                if (totalEventCount % 1_000_000 == 0 && logger.isDebugEnabled()) {
                    incrementAndReportStats();
                }
            }
        }
    }
}
 
Example #11
Source File: WriteAheadStorePartition.java    From nifi with Apache License 2.0 5 votes vote down vote up
private Map<ProvenanceEventRecord, StorageSummary> addEvents(final Iterable<ProvenanceEventRecord> events, final RecordWriter writer) throws IOException {
    final Map<ProvenanceEventRecord, StorageSummary> locationMap = new HashMap<>();

    try {
        long maxId = -1L;
        int numEvents = 0;
        for (final ProvenanceEventRecord nextEvent : events) {
            final StorageSummary writerSummary = writer.writeRecord(nextEvent);
            final StorageSummary summaryWithIndex = new StorageSummary(writerSummary.getEventId(), writerSummary.getStorageLocation(), this.partitionName,
                writerSummary.getBlockIndex(), writerSummary.getSerializedLength(), writerSummary.getBytesWritten());
            locationMap.put(nextEvent, summaryWithIndex);
            maxId = summaryWithIndex.getEventId();
            numEvents++;
        }

        if (numEvents == 0) {
            return locationMap;
        }

        writer.flush();

        // Update max event id to be equal to be the greater of the current value or the
        // max value just written.
        final long maxIdWritten = maxId;
        this.maxEventId.getAndUpdate(cur -> Math.max(maxIdWritten, cur));

        if (config.isAlwaysSync()) {
            writer.sync();
        }
    } catch (final Exception e) {
        // We need to set the repoDirty flag before we release the lock for this journal.
        // Otherwise, another thread may write to this journal -- this is a problem because
        // the journal contains part of our record but not all of it. Writing to the end of this
        // journal will result in corruption!
        writer.markDirty();
        throw e;
    }

    return locationMap;
}
 
Example #12
Source File: LuceneEventIndex.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
protected void addEvent(final ProvenanceEventRecord event, final StorageSummary location) {
    for (final CachedQuery cachedQuery : cachedQueries) {
        cachedQuery.update(event, location);
    }

    final Document document = eventConverter.convert(event, location);
    if (document == null) {
        logger.debug("Received Provenance Event {} to index but it contained no information that should be indexed, so skipping it", event);
    } else {
        final StoredDocument doc = new StoredDocument(document, location);
        boolean added = false;
        while (!added && !closed) {

            added = documentQueue.offer(doc);
            if (!added) {
                final long start = System.nanoTime();
                try {
                    added = documentQueue.offer(doc, 1, TimeUnit.SECONDS);
                } catch (final InterruptedException e) {
                    Thread.currentThread().interrupt();
                    logger.warn("Interrupted while attempting to enqueue Provenance Event for indexing; this event will not be indexed");
                    return;
                }
                final long nanos = System.nanoTime() - start;
                queuePauseNanos.add(new TimestampedLong(nanos));
            }

            if (added) {
                final long totalEventCount = eventCount.incrementAndGet();
                if (totalEventCount % 1_000_000 == 0 && logger.isDebugEnabled()) {
                    incrementAndReportStats();
                }
            }
        }
    }
}
 
Example #13
Source File: TestLuceneEventIndex.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 60000)
public void addThenQueryWithEmptyQuery() throws InterruptedException {
    assumeFalse(isWindowsEnvironment());
    final RepositoryConfiguration repoConfig = createConfig();
    final IndexManager indexManager = new StandardIndexManager(repoConfig);

    final LuceneEventIndex index = new LuceneEventIndex(repoConfig, indexManager, 1, EventReporter.NO_OP);

    final ProvenanceEventRecord event = createEvent();

    index.addEvent(event, new StorageSummary(event.getEventId(), "1.prov", "1", 1, 2L, 2L));

    final Query query = new Query(UUID.randomUUID().toString());

    final ArrayListEventStore eventStore = new ArrayListEventStore();
    eventStore.addEvent(event);
    index.initialize(eventStore);

    // We don't know how long it will take for the event to be indexed, so keep querying until
    // we get a result. The test will timeout after 5 seconds if we've still not succeeded.
    List<ProvenanceEventRecord> matchingEvents = Collections.emptyList();
    while (matchingEvents.isEmpty()) {
        final QuerySubmission submission = index.submitQuery(query, EventAuthorizer.GRANT_ALL, "unit test user");
        assertNotNull(submission);

        final QueryResult result = submission.getResult();
        assertNotNull(result);
        result.awaitCompletion(4000, TimeUnit.MILLISECONDS);

        assertTrue(result.isFinished());
        assertNull(result.getError());

        matchingEvents = result.getMatchingEvents();
        assertNotNull(matchingEvents);
        Thread.sleep(100L); // avoid crushing the CPU
    }

    assertEquals(1, matchingEvents.size());
    assertEquals(event, matchingEvents.get(0));
}
 
Example #14
Source File: ArrayListEventStore.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Override
public synchronized StorageResult addEvents(Iterable<ProvenanceEventRecord> events) {
    final Map<ProvenanceEventRecord, StorageSummary> storageLocations = new HashMap<>();

    for (final ProvenanceEventRecord event : events) {
        this.events.add(event);

        final StorageSummary storageSummary = new StorageSummary(idGenerator.getAndIncrement(), "location", "1", 1, 0L, 0L);
        storageLocations.put(event, storageSummary);
    }

    return new StorageResult() {
        @Override
        public Map<ProvenanceEventRecord, StorageSummary> getStorageLocations() {
            return storageLocations;
        }

        @Override
        public boolean triggeredRollover() {
            return false;
        }

        @Override
        public Integer getEventsRolledOver() {
            return null;
        }
    };
}
 
Example #15
Source File: StoredDocument.java    From nifi with Apache License 2.0 4 votes vote down vote up
public StorageSummary getStorageSummary() {
    return storageSummary;
}
 
Example #16
Source File: LuceneEventIndex.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void reindexEvents(final Map<ProvenanceEventRecord, StorageSummary> events) {
    if (newestIndexDefunct) {
        logger.info("Will avoid re-indexing {} events because the newest index is defunct, so it will be re-indexed in the background", events.size());
        return;
    }

    final EventIndexTask indexTask = new EventIndexTask(documentQueue, indexManager, directoryManager, EventIndexTask.DEFAULT_MAX_EVENTS_PER_COMMIT, eventReporter);

    File lastIndexDir = null;
    long lastEventTime = -2L;

    final List<IndexableDocument> indexableDocs = new ArrayList<>(events.size());
    for (final Map.Entry<ProvenanceEventRecord, StorageSummary> entry : events.entrySet()) {
        final ProvenanceEventRecord event = entry.getKey();
        final StorageSummary summary = entry.getValue();

        for (final CachedQuery cachedQuery : cachedQueries) {
            cachedQuery.update(event, summary);
        }

        final Document document = eventConverter.convert(event, summary);
        if (document == null) {
            logger.debug("Received Provenance Event {} to index but it contained no information that should be indexed, so skipping it", event.getEventId());
        } else {
            final File indexDir;
            if (event.getEventTime() == lastEventTime) {
                indexDir = lastIndexDir;
            } else {
                final List<File> files = getDirectoryManager().getDirectories(event.getEventTime(), null, false);
                if (files.isEmpty()) {
                    final String partitionName = summary.getPartitionName().get();
                    indexDir = getDirectoryManager().getWritableIndexingDirectory(event.getEventTime(), partitionName);
                } else {
                    indexDir = files.get(0);
                }

                lastIndexDir = indexDir;
            }

            final IndexableDocument doc = new IndexableDocument(document, summary, indexDir);
            indexableDocs.add(doc);
        }
    }

    try {
        indexTask.reIndex(indexableDocs, CommitPreference.PREVENT_COMMIT);
    } catch (final IOException ioe) {
        logger.error("Failed to reindex some Provenance Events", ioe);
        eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to re-index some Provenance Events. "
            + "Some Provenance Events may not be available for querying. See logs for more information.");
    }
}
 
Example #17
Source File: IndexableDocument.java    From nifi with Apache License 2.0 4 votes vote down vote up
public StorageSummary getPersistenceLocation() {
    return persistenceLocation;
}
 
Example #18
Source File: IndexableDocument.java    From nifi with Apache License 2.0 4 votes vote down vote up
public IndexableDocument(final Document document, final StorageSummary location, final File indexDirectory) {
    this.document = document;
    this.persistenceLocation = location;
    this.indexDirectory = indexDirectory;
}
 
Example #19
Source File: LatestEventsPerProcessorQuery.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void update(final ProvenanceEventRecord event, final StorageSummary storageSummary) {
    final String componentId = event.getComponentId();
    final RingBuffer<Long> ringBuffer = latestRecords.computeIfAbsent(componentId, id -> new RingBuffer<>(1000));
    ringBuffer.add(storageSummary.getEventId());
}
 
Example #20
Source File: LatestEventsQuery.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void update(final ProvenanceEventRecord event, final StorageSummary storageSummary) {
    latestRecords.add(storageSummary.getEventId());
}
 
Example #21
Source File: ConvertEventToLuceneDocument.java    From nifi with Apache License 2.0 4 votes vote down vote up
public Document convert(final ProvenanceEventRecord record, final StorageSummary persistedEvent) {
    return convert(record, persistedEvent.getEventId());
}
 
Example #22
Source File: StoredDocument.java    From nifi with Apache License 2.0 4 votes vote down vote up
public StoredDocument(final Document document, final StorageSummary summary) {
    this.document = document;
    this.storageSummary = summary;
}
 
Example #23
Source File: EventIdFirstSchemaRecordWriter.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public StorageSummary writeRecord(final ProvenanceEventRecord record) throws IOException {
    if (isDirty()) {
        throw new IOException("Cannot update Provenance Repository because this Record Writer has already failed to write to the Repository");
    }

    final long lockStart;
    final long writeStart;
    final long startBytes;
    final long endBytes;
    final long recordIdentifier;

    final long serializeStart = System.nanoTime();
    final ByteArrayDataOutputStream bados = streamCache.checkOut();
    try {
        writeRecord(record, 0L, bados.getDataOutputStream());

        lockStart = System.nanoTime();
        synchronized (this) {
            writeStart = System.nanoTime();
            try {
                recordIdentifier = record.getEventId() == -1L ? getIdGenerator().getAndIncrement() : record.getEventId();
                startBytes = getBytesWritten();

                ensureStreamState(recordIdentifier, startBytes);

                final DataOutputStream out = getBufferedOutputStream();
                final int recordIdOffset = (int) (recordIdentifier - firstEventId);
                out.writeInt(recordIdOffset);

                final ByteArrayOutputStream baos = bados.getByteArrayOutputStream();
                out.writeInt(baos.size());
                baos.writeTo(out);

                recordCount.incrementAndGet();
                endBytes = getBytesWritten();
            } catch (final IOException ioe) {
                markDirty();
                throw ioe;
            }
        }
    } finally {
        streamCache.checkIn(bados);
    }

    if (logger.isDebugEnabled()) {
        // Collect stats and periodically dump them if log level is set to at least info.
        final long writeNanos = System.nanoTime() - writeStart;
        writeTimes.add(new TimestampedLong(writeNanos));

        final long serializeNanos = lockStart - serializeStart;
        serializeTimes.add(new TimestampedLong(serializeNanos));

        final long lockNanos = writeStart - lockStart;
        lockTimes.add(new TimestampedLong(lockNanos));
        bytesWritten.add(new TimestampedLong(endBytes - startBytes));

        final long recordCount = totalRecordCount.incrementAndGet();
        if (recordCount % 1_000_000 == 0) {
            final long sixtySecondsAgo = System.currentTimeMillis() - 60000L;
            final Long writeNanosLast60 = writeTimes.getAggregateValue(sixtySecondsAgo).getValue();
            final Long lockNanosLast60 = lockTimes.getAggregateValue(sixtySecondsAgo).getValue();
            final Long serializeNanosLast60 = serializeTimes.getAggregateValue(sixtySecondsAgo).getValue();
            final Long bytesWrittenLast60 = bytesWritten.getAggregateValue(sixtySecondsAgo).getValue();
            logger.debug("In the last 60 seconds, have spent {} millis writing to file ({} MB), {} millis waiting on synchronize block, {} millis serializing events",
                TimeUnit.NANOSECONDS.toMillis(writeNanosLast60),
                bytesWrittenLast60 / 1024 / 1024,
                TimeUnit.NANOSECONDS.toMillis(lockNanosLast60),
                TimeUnit.NANOSECONDS.toMillis(serializeNanosLast60));
        }
    }

    final long serializedLength = endBytes - startBytes;
    final TocWriter tocWriter = getTocWriter();
    final Integer blockIndex = tocWriter == null ? null : tocWriter.getCurrentBlockIndex();
    final File file = getFile();
    final String storageLocation = file.getParentFile().getName() + "/" + file.getName();
    return new StorageSummary(recordIdentifier, storageLocation, blockIndex, serializedLength, endBytes);
}
 
Example #24
Source File: StorageSummaryEvent.java    From nifi with Apache License 2.0 4 votes vote down vote up
public StorageSummaryEvent(final ProvenanceEventRecord event, final StorageSummary storageSummary) {
    this.event = event;
    this.storageSummary = storageSummary;
}
 
Example #25
Source File: StorageResult.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Override
public Map<ProvenanceEventRecord, StorageSummary> getStorageLocations() {
    return Collections.emptyMap();
}
 
Example #26
Source File: TestLuceneEventIndex.java    From nifi with Apache License 2.0 4 votes vote down vote up
private StorageSummary createStorageSummary(final long eventId) {
    return new StorageSummary(eventId, "1.prov", "1", 1, 2L, 2L);
}
 
Example #27
Source File: TestLuceneEventIndex.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 50000)
public void testQuerySpecificField() throws InterruptedException {
    final RepositoryConfiguration repoConfig = createConfig();
    final IndexManager indexManager = new StandardIndexManager(repoConfig);

    final LuceneEventIndex index = new LuceneEventIndex(repoConfig, indexManager, 2, EventReporter.NO_OP);

    // add 2 events, one of which we will query for.
    final ProvenanceEventRecord event = createEvent();
    index.addEvent(event, new StorageSummary(event.getEventId(), "1.prov", "1", 1, 2L, 2L));
    index.addEvent(createEvent(), new StorageSummary(2L, "1.prov", "1", 1, 2L, 2L));

    // Create a query that searches for the event with the FlowFile UUID equal to the first event's.
    final Query query = new Query(UUID.randomUUID().toString());
    query.addSearchTerm(SearchTerms.newSearchTerm(SearchableFields.FlowFileUUID, event.getFlowFileUuid()));

    final ArrayListEventStore eventStore = new ArrayListEventStore();
    eventStore.addEvent(event);
    index.initialize(eventStore);

    // We don't know how long it will take for the event to be indexed, so keep querying until
    // we get a result. The test will timeout after 5 seconds if we've still not succeeded.
    List<ProvenanceEventRecord> matchingEvents = Collections.emptyList();
    while (matchingEvents.isEmpty()) {
        final QuerySubmission submission = index.submitQuery(query, EventAuthorizer.GRANT_ALL, "unit test user");
        assertNotNull(submission);

        final QueryResult result = submission.getResult();
        assertNotNull(result);
        result.awaitCompletion(4000, TimeUnit.MILLISECONDS);

        assertTrue(result.isFinished());
        assertNull(result.getError());

        matchingEvents = result.getMatchingEvents();
        assertNotNull(matchingEvents);
        Thread.sleep(100L); // avoid crushing the CPU
    }

    assertEquals(1, matchingEvents.size());
    assertEquals(event, matchingEvents.get(0));
}
 
Example #28
Source File: TestWriteAheadStorePartition.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testReindex() throws IOException {
    final RepositoryConfiguration repoConfig = createConfig(1, "testReindex");
    repoConfig.setMaxEventFileCount(5);

    final String partitionName = repoConfig.getStorageDirectories().keySet().iterator().next();
    final File storageDirectory = repoConfig.getStorageDirectories().values().iterator().next();

    final RecordWriterFactory recordWriterFactory = (file, idGenerator, compressed, createToc) -> {
        final TocWriter tocWriter = createToc ? new StandardTocWriter(TocUtil.getTocFile(file), false, false) : null;
        return new EventIdFirstSchemaRecordWriter(file, idGenerator, tocWriter, compressed, 32 * 1024, IdentifierLookup.EMPTY);
    };

    final RecordReaderFactory recordReaderFactory = RecordReaders::newRecordReader;

    final WriteAheadStorePartition partition = new WriteAheadStorePartition(storageDirectory, partitionName, repoConfig, recordWriterFactory,
        recordReaderFactory, new LinkedBlockingQueue<>(), new AtomicLong(0L), EventReporter.NO_OP, Mockito.mock(EventFileManager.class));

    for (int i = 0; i < 100; i++) {
        partition.addEvents(Collections.singleton(TestUtil.createEvent()));
    }

    final Map<ProvenanceEventRecord, StorageSummary> reindexedEvents = new ConcurrentHashMap<>();
    final EventIndex eventIndex = Mockito.mock(EventIndex.class);
    Mockito.doAnswer(new Answer<Object>() {
        @Override
        public Object answer(final InvocationOnMock invocation) throws Throwable {
            final Map<ProvenanceEventRecord, StorageSummary> events = invocation.getArgument(0);
            reindexedEvents.putAll(events);
            return null;
        }
    }).when(eventIndex).reindexEvents(Mockito.anyMap());

    Mockito.doReturn(18L).when(eventIndex).getMinimumEventIdToReindex("1");
    partition.reindexLatestEvents(eventIndex);

    final List<Long> eventIdsReindexed = reindexedEvents.values().stream()
        .map(StorageSummary::getEventId)
        .sorted()
        .collect(Collectors.toList());

    assertEquals(82, eventIdsReindexed.size());
    for (int i = 0; i < eventIdsReindexed.size(); i++) {
        assertEquals(18 + i, eventIdsReindexed.get(i).intValue());
    }
}
 
Example #29
Source File: LatestEventsPerProcessorQuery.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public void update(final ProvenanceEventRecord event, final StorageSummary storageSummary) {
    final String componentId = event.getComponentId();
    final RingBuffer<Long> ringBuffer = latestRecords.computeIfAbsent(componentId, id -> new RingBuffer<>(1000));
    ringBuffer.add(storageSummary.getEventId());
}
 
Example #30
Source File: EventIdFirstSchemaRecordWriter.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Override
public StorageSummary writeRecord(final ProvenanceEventRecord record) throws IOException {
    if (isDirty()) {
        throw new IOException("Cannot update Provenance Repository because this Record Writer has already failed to write to the Repository");
    }

    final long serializeStart = System.nanoTime();
    final byte[] serialized;
    try (final ByteArrayOutputStream baos = new ByteArrayOutputStream(256);
        final DataOutputStream dos = new DataOutputStream(baos)) {
        writeRecord(record, 0L, dos);
        serialized = baos.toByteArray();
    }

    final long lockStart = System.nanoTime();
    final long writeStart;
    final long startBytes;
    final long endBytes;
    final long recordIdentifier;
    synchronized (this) {
        writeStart = System.nanoTime();
        try {
            recordIdentifier = record.getEventId() == -1L ? getIdGenerator().getAndIncrement() : record.getEventId();
            startBytes = getBytesWritten();

            ensureStreamState(recordIdentifier, startBytes);

            final DataOutputStream out = getBufferedOutputStream();
            final int recordIdOffset = (int) (recordIdentifier - firstEventId);
            out.writeInt(recordIdOffset);
            out.writeInt(serialized.length);
            out.write(serialized);

            recordCount.incrementAndGet();
            endBytes = getBytesWritten();
        } catch (final IOException ioe) {
            markDirty();
            throw ioe;
        }
    }

    if (logger.isDebugEnabled()) {
        // Collect stats and periodically dump them if log level is set to at least info.
        final long writeNanos = System.nanoTime() - writeStart;
        writeTimes.add(new TimestampedLong(writeNanos));

        final long serializeNanos = lockStart - serializeStart;
        serializeTimes.add(new TimestampedLong(serializeNanos));

        final long lockNanos = writeStart - lockStart;
        lockTimes.add(new TimestampedLong(lockNanos));
        bytesWritten.add(new TimestampedLong(endBytes - startBytes));

        final long recordCount = totalRecordCount.incrementAndGet();
        if (recordCount % 1_000_000 == 0) {
            final long sixtySecondsAgo = System.currentTimeMillis() - 60000L;
            final Long writeNanosLast60 = writeTimes.getAggregateValue(sixtySecondsAgo).getValue();
            final Long lockNanosLast60 = lockTimes.getAggregateValue(sixtySecondsAgo).getValue();
            final Long serializeNanosLast60 = serializeTimes.getAggregateValue(sixtySecondsAgo).getValue();
            final Long bytesWrittenLast60 = bytesWritten.getAggregateValue(sixtySecondsAgo).getValue();
            logger.debug("In the last 60 seconds, have spent {} millis writing to file ({} MB), {} millis waiting on synchronize block, {} millis serializing events",
                TimeUnit.NANOSECONDS.toMillis(writeNanosLast60),
                bytesWrittenLast60 / 1024 / 1024,
                TimeUnit.NANOSECONDS.toMillis(lockNanosLast60),
                TimeUnit.NANOSECONDS.toMillis(serializeNanosLast60));
        }
    }

    final long serializedLength = endBytes - startBytes;
    final TocWriter tocWriter = getTocWriter();
    final Integer blockIndex = tocWriter == null ? null : tocWriter.getCurrentBlockIndex();
    final File file = getFile();
    final String storageLocation = file.getParentFile().getName() + "/" + file.getName();
    return new StorageSummary(recordIdentifier, storageLocation, blockIndex, serializedLength, endBytes);
}