Java Code Examples for org.elasticsearch.common.lucene.Lucene#isCorruptionException()

The following examples show how to use org.elasticsearch.common.lucene.Lucene#isCorruptionException() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: test.java    From vscode-extension with MIT License 6 votes vote down vote up
private void refreshLastCommittedSegmentInfos() {
/*
 * we have to inc-ref the store here since if the engine is closed by a tragic event
 * we don't acquire the write lock and wait until we have exclusive access. This might also
 * dec the store reference which can essentially close the store and unless we can inc the reference
 * we can't use it.
 */
    store.incRef();
    try {
        // reread the last committed segment infos
        lastCommittedSegmentInfos = store.readLastCommittedSegmentsInfo();
    } catch (Exception e) {
        if (isClosed.get() == false) {
            try {
                logger.warn("failed to read latest segment infos on flush", e);
            } catch (Exception inner) {
                e.addSuppressed(inner);
            }
            if (Lucene.isCorruptionException(e)) {
                throw new FlushFailedEngineException(shardId, e);
            }
        }
    } finally {
        store.decRef();
    }
}
 
Example 2
Source File: InternalEngine.java    From crate with Apache License 2.0 6 votes vote down vote up
private void refreshLastCommittedSegmentInfos() {
    /*
    * we have to inc-ref the store here since if the engine is closed by a tragic event
    * we don't acquire the write lock and wait until we have exclusive access. This might also
    * dec the store reference which can essentially close the store and unless we can inc the reference
    * we can't use it.
    */
    store.incRef();
    try {
        // reread the last committed segment infos
        lastCommittedSegmentInfos = store.readLastCommittedSegmentsInfo();
    } catch (Exception e) {
        if (isClosed.get() == false) {
            try {
                logger.warn("failed to read latest segment infos on flush", e);
            } catch (Exception inner) {
                e.addSuppressed(inner);
            }
            if (Lucene.isCorruptionException(e)) {
                throw new FlushFailedEngineException(shardId, e);
            }
        }
    } finally {
        store.decRef();
    }
}
 
Example 3
Source File: RestoreService.java    From crate with Apache License 2.0 6 votes vote down vote up
@Override
public void shardFailed(ShardRouting failedShard, UnassignedInfo unassignedInfo) {
    if (failedShard.primary() && failedShard.initializing()) {
        RecoverySource recoverySource = failedShard.recoverySource();
        if (recoverySource.getType() == RecoverySource.Type.SNAPSHOT) {
            Snapshot snapshot = ((SnapshotRecoverySource) recoverySource).snapshot();
            // mark restore entry for this shard as failed when it's due to a file corruption. There is no need wait on retries
            // to restore this shard on another node if the snapshot files are corrupt. In case where a node just left or crashed,
            // however, we only want to acknowledge the restore operation once it has been successfully restored on another node.
            if (unassignedInfo.getFailure() != null && Lucene.isCorruptionException(unassignedInfo.getFailure().getCause())) {
                changes(snapshot).shards.put(failedShard.shardId(), new ShardRestoreStatus(failedShard.currentNodeId(),
                    RestoreInProgress.State.FAILURE, unassignedInfo.getFailure().getCause().getMessage()));
            }
        }
    }
}
 
Example 4
Source File: SearchService.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
private void processFailure(SearchContext context, Throwable t) {
    freeContext(context.id());
    try {
        if (Lucene.isCorruptionException(t)) {
            context.indexShard().failShard("search execution corruption failure", t);
        }
    } catch (Throwable e) {
        logger.warn("failed to process shard failure to (potentially) send back shard failure on corruption", e);
    }
}
 
Example 5
Source File: Engine.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
/**
 * fail engine due to some error. the engine will also be closed.
 * The underlying store is marked corrupted iff failure is caused by index corruption
 */
public void failEngine(String reason, @Nullable Throwable failure) {
    if (failEngineLock.tryLock()) {
        store.incRef();
        try {
            try {
                // we just go and close this engine - no way to recover
                closeNoLock("engine failed on: [" + reason + "]");
            } finally {
                if (failedEngine != null) {
                    logger.debug("tried to fail engine but engine is already failed. ignoring. [{}]", failure, reason);
                    return;
                }
                logger.warn("failed engine [{}]", failure, reason);
                // we must set a failure exception, generate one if not supplied
                failedEngine = (failure != null) ? failure : new IllegalStateException(reason);
                // we first mark the store as corrupted before we notify any listeners
                // this must happen first otherwise we might try to reallocate so quickly
                // on the same node that we don't see the corrupted marker file when
                // the shard is initializing
                if (Lucene.isCorruptionException(failure)) {
                    try {
                        store.markStoreCorrupted(new IOException("failed engine (reason: [" + reason + "])", ExceptionsHelper.unwrapCorruption(failure)));
                    } catch (IOException e) {
                        logger.warn("Couldn't mark store corrupted", e);
                    }
                }
                failedEngineListener.onFailedEngine(shardId, reason, failure);
            }
        } catch (Throwable t) {
            // don't bubble up these exceptions up
            logger.warn("failEngine threw exception", t);
        } finally {
            store.decRef();
        }
    } else {
        logger.debug("tried to fail engine but could not acquire lock - engine should be failed by now [{}]", failure, reason);
    }
}
 
Example 6
Source File: Engine.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
/** Check whether the engine should be failed */
protected boolean maybeFailEngine(String source, Throwable t) {
    if (Lucene.isCorruptionException(t)) {
        failEngine("corrupt file (source: [" + source + "])", t);
        return true;
    } else if (ExceptionsHelper.isOOM(t)) {
        failEngine("out of memory (source: [" + source + "])", t);
        return true;
    } else if (t instanceof RecoveryFromDistributedLogFailedException) {
        failEngine("recovery from distributed log service failed", t);
        return true;
    }
    return false;
}
 
Example 7
Source File: ShadowEngine.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public CommitId flush(boolean force, boolean waitIfOngoing) throws EngineException {
    logger.trace("skipping FLUSH on shadow engine");
    // reread the last committed segment infos
    refresh("flush");
    /*
     * we have to inc-ref the store here since if the engine is closed by a tragic event
     * we don't acquire the write lock and wait until we have exclusive access. This might also
     * dec the store reference which can essentially close the store and unless we can inc the reference
     * we can't use it.
     */
    store.incRef();
    try (ReleasableLock lock = readLock.acquire()) {
        // reread the last committed segment infos
        lastCommittedSegmentInfos = readLastCommittedSegmentInfos(searcherManager, store);
    } catch (Throwable e) {
        if (isClosed.get() == false) {
            logger.warn("failed to read latest segment infos on flush", e);
            if (Lucene.isCorruptionException(e)) {
                throw new FlushFailedEngineException(shardId, e);
            }
        }
    } finally {
        store.decRef();
    }
    return new CommitId(lastCommittedSegmentInfos.getId());
}
 
Example 8
Source File: StoreRecoveryService.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
/**
 * Restores shard from {@link RestoreSource} associated with this shard in routing table
 *
 * @param recoveryState recovery state
 */
private void restore(final IndexShard indexShard, final RecoveryState recoveryState) {
    RestoreSource restoreSource = indexShard.routingEntry().restoreSource();
    if (restoreSource == null) {
        throw new IndexShardRestoreFailedException(shardId, "empty restore source");
    }
    if (logger.isTraceEnabled()) {
        logger.trace("[{}] restoring shard  [{}]", restoreSource.snapshotId(), shardId);
    }
    try {
        recoveryState.getTranslog().totalOperations(0);
        recoveryState.getTranslog().totalOperationsOnStart(0);
        indexShard.prepareForIndexRecovery();
        IndexShardRepository indexShardRepository = repositoriesService.indexShardRepository(restoreSource.snapshotId().getRepository());
        ShardId snapshotShardId = shardId;
        if (!shardId.getIndex().equals(restoreSource.index())) {
            snapshotShardId = new ShardId(restoreSource.index(), shardId.id());
        }
        indexShardRepository.restore(restoreSource.snapshotId(), restoreSource.version(), shardId, snapshotShardId, recoveryState);
        indexShard.skipTranslogRecovery();
        indexShard.finalizeRecovery();
        indexShard.postRecovery("restore done");
        restoreService.indexShardRestoreCompleted(restoreSource.snapshotId(), shardId);
    } catch (Throwable t) {
        if (Lucene.isCorruptionException(t)) {
            restoreService.failRestore(restoreSource.snapshotId(), shardId());
        }
        throw new IndexShardRestoreFailedException(shardId, "restore failed", t);
    }
}
 
Example 9
Source File: BlobStoreRepository.java    From crate with Apache License 2.0 5 votes vote down vote up
private static void failStoreIfCorrupted(Store store, Exception e) {
    if (Lucene.isCorruptionException(e)) {
        try {
            store.markStoreCorrupted((IOException) e);
        } catch (IOException inner) {
            inner.addSuppressed(e);
            LOGGER.warn("store cannot be marked as corrupted", inner);
        }
    }
}
 
Example 10
Source File: Engine.java    From crate with Apache License 2.0 5 votes vote down vote up
/** Check whether the engine should be failed */
protected boolean maybeFailEngine(String source, Exception e) {
    if (Lucene.isCorruptionException(e)) {
        failEngine("corrupt file (source: [" + source + "])", e);
        return true;
    }
    return false;
}
 
Example 11
Source File: Engine.java    From crate with Apache License 2.0 4 votes vote down vote up
/**
 * fail engine due to some error. the engine will also be closed.
 * The underlying store is marked corrupted iff failure is caused by index corruption
 */
public void failEngine(String reason, @Nullable Exception failure) {
    if (failure != null) {
        maybeDie(reason, failure);
    }
    if (failEngineLock.tryLock()) {
        try {
            if (failedEngine.get() != null) {
                logger.warn(() -> new ParameterizedMessage("tried to fail engine but engine is already failed. ignoring. [{}]", reason), failure);
                return;
            }
            // this must happen before we close IW or Translog such that we can check this state to opt out of failing the engine
            // again on any caught AlreadyClosedException
            failedEngine.set((failure != null) ? failure : new IllegalStateException(reason));
            try {
                // we just go and close this engine - no way to recover
                closeNoLock("engine failed on: [" + reason + "]", closedLatch);
            } finally {
                logger.warn(() -> new ParameterizedMessage("failed engine [{}]", reason), failure);
                // we must set a failure exception, generate one if not supplied
                // we first mark the store as corrupted before we notify any listeners
                // this must happen first otherwise we might try to reallocate so quickly
                // on the same node that we don't see the corrupted marker file when
                // the shard is initializing
                if (Lucene.isCorruptionException(failure)) {
                    if (store.tryIncRef()) {
                        try {
                            store.markStoreCorrupted(new IOException("failed engine (reason: [" + reason + "])", ExceptionsHelper.unwrapCorruption(failure)));
                        } catch (IOException e) {
                            logger.warn("Couldn't mark store corrupted", e);
                        } finally {
                            store.decRef();
                        }
                    } else {
                        logger.warn(() ->
                            new ParameterizedMessage("tried to mark store as corrupted but store is already closed. [{}]", reason),
                            failure
                        );
                    }
                }
                eventListener.onFailedEngine(reason, failure);
            }
        } catch (Exception inner) {
            if (failure != null) inner.addSuppressed(failure);
            // don't bubble up these exceptions up
            logger.warn("failEngine threw exception", inner);
        }
    } else {
        logger.debug(() -> new ParameterizedMessage("tried to fail engine but could not acquire lock - engine should be failed by now [{}]", reason), failure);
    }
}