Java Code Examples for org.nd4j.linalg.ops.transforms.Transforms#sqrt()

The following examples show how to use org.nd4j.linalg.ops.transforms.Transforms#sqrt() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AdaDeltaUpdater.java    From nd4j with Apache License 2.0 6 votes vote down vote up
/**
 * Get the updated gradient for the given gradient
 * and also update the state of ada delta.
 *
 * @param gradient  the gradient to get the
 *                  updated gradient for
 * @param iteration
 * @return the update gradient
 */
@Override
public void applyUpdater(INDArray gradient, int iteration, int epoch) {
    if (msg == null || msdx == null)
        throw new IllegalStateException("Updater has not been initialized with view state");

    double rho = config.getRho();
    double epsilon = config.getEpsilon();

    //Line 4 of Algorithm 1: https://arxiv.org/pdf/1212.5701v1.pdf
    //E[g^2]_t = rho * E[g^2]_{t−1} + (1-rho)*g^2_t
    msg.muli(rho).addi(gradient.mul(gradient).muli(1 - rho));

    //Calculate update:
    //dX = - g * RMS[delta x]_{t-1} / RMS[g]_t
    //Note: negative is applied in the DL4J step function: params -= update rather than params += update
    INDArray rmsdx_t1 = Transforms.sqrt(msdx.add(epsilon), false);
    INDArray rmsg_t = Transforms.sqrt(msg.add(epsilon), false);
    INDArray update = gradient.muli(rmsdx_t1.divi(rmsg_t));

    //Accumulate gradients: E[delta x^2]_t = rho * E[delta x^2]_{t-1} + (1-rho)* (delta x_t)^2
    msdx.muli(rho).addi(update.mul(update).muli(1 - rho));
}
 
Example 2
Source File: UpdaterJavaCode.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
public static void applyAdaDeltaUpdater(INDArray gradient, INDArray msg, INDArray msdx, double rho, double epsilon){

        //Line 4 of Algorithm 1: https://arxiv.org/pdf/1212.5701v1.pdf
        //E[g^2]_t = rho * E[g^2]_{t-1} + (1-rho)*g^2_t
        msg.muli(rho).addi(gradient.mul(gradient).muli(1 - rho));

        //Calculate update:
        //dX = - g * RMS[delta x]_{t-1} / RMS[g]_t
        //Note: negative is applied in the DL4J step function: params -= update rather than params += update
        INDArray rmsdx_t1 = Transforms.sqrt(msdx.add(epsilon), false);
        INDArray rmsg_t = Transforms.sqrt(msg.add(epsilon), false);
        INDArray update = gradient.muli(rmsdx_t1.divi(rmsg_t));

        //Accumulate gradients: E[delta x^2]_t = rho * E[delta x^2]_{t-1} + (1-rho)* (delta x_t)^2
        msdx.muli(rho).addi(update.mul(update).muli(1 - rho));
    }
 
Example 3
Source File: DistributionStats.java    From nd4j with Apache License 2.0 5 votes vote down vote up
/**
 * Create a DistributionStats object from the data ingested so far. Can be used multiple times when updating
 * online.
 */
public DistributionStats build() {
    if (runningMean == null) {
        throw new RuntimeException("No data was added, statistics cannot be determined");
    }
    return new DistributionStats(runningMean.dup(), Transforms.sqrt(runningVariance, true));
}
 
Example 4
Source File: DistributionStats.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Create a DistributionStats object from the data ingested so far. Can be used multiple times when updating
 * online.
 */
public DistributionStats build() {
    if (runningMean == null) {
        throw new RuntimeException("No data was added, statistics cannot be determined");
    }
    return new DistributionStats(runningMean.dup(), Transforms.sqrt(runningVariance, true));
}
 
Example 5
Source File: GaussianReconstructionDistribution.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public INDArray gradient(INDArray x, INDArray preOutDistributionParams) {
    INDArray output = preOutDistributionParams.dup();
    activationFn.getActivation(output, true);

    val size = output.size(1) / 2;
    INDArray mean = output.get(NDArrayIndex.all(), NDArrayIndex.interval(0, size));
    INDArray logStdevSquared = output.get(NDArrayIndex.all(), NDArrayIndex.interval(size, 2 * size));

    INDArray sigmaSquared = Transforms.exp(logStdevSquared, true).castTo(x.dataType());

    INDArray xSubMean = x.sub(mean.castTo(x.dataType()));
    INDArray xSubMeanSq = xSubMean.mul(xSubMean);

    INDArray dLdmu = xSubMean.divi(sigmaSquared);

    INDArray sigma = Transforms.sqrt(sigmaSquared, true);
    INDArray sigma3 = Transforms.pow(sigmaSquared, 3.0 / 2);

    INDArray dLdsigma = sigma.rdiv(-1).addi(xSubMeanSq.divi(sigma3));
    INDArray dLdlogSigma2 = sigma.divi(2).muli(dLdsigma);

    INDArray dLdx = Nd4j.createUninitialized(preOutDistributionParams.dataType(), output.shape());
    dLdx.put(new INDArrayIndex[] {NDArrayIndex.all(), NDArrayIndex.interval(0, size)}, dLdmu);
    dLdx.put(new INDArrayIndex[] {NDArrayIndex.all(), NDArrayIndex.interval(size, 2 * size)}, dLdlogSigma2);
    dLdx.negi();

    //dL/dz
    return activationFn.backprop(preOutDistributionParams.dup(), dLdx).getFirst();
}
 
Example 6
Source File: GaussianReconstructionDistribution.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public INDArray generateRandom(INDArray preOutDistributionParams) {
    INDArray output = preOutDistributionParams.dup();
    activationFn.getActivation(output, true);

    val size = output.size(1) / 2;
    INDArray mean = output.get(NDArrayIndex.all(), NDArrayIndex.interval(0, size));
    INDArray logStdevSquared = output.get(NDArrayIndex.all(), NDArrayIndex.interval(size, 2 * size));

    INDArray sigma = Transforms.exp(logStdevSquared, true);
    Transforms.sqrt(sigma, false);

    INDArray e = Nd4j.randn(sigma.shape());
    return e.muli(sigma).addi(mean); //mu + sigma * N(0,1) ~ N(mu,sigma^2)
}
 
Example 7
Source File: CoverageModelEMWorkspace.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Initialize model parameters by performing PCA.
 */
@EvaluatesRDD @UpdatesRDD @CachesRDD
private void initializeWorkersWithPCA() {
    logger.info("Initializing model parameters using PCA...");
    /* initially, set m_t, Psi_t and W_tl to zero to get an estimate of the read depth */
    final int numLatents = config.getNumLatents();
    mapWorkers(cb -> cb
            .cloneWithUpdatedPrimitive(CoverageModelEMComputeBlock.CoverageModelICGCacheNode.m_t,
                    Nd4j.zeros(new int[] {1, cb.getTargetSpaceBlock().getNumElements()}))
            .cloneWithUpdatedPrimitive(CoverageModelEMComputeBlock.CoverageModelICGCacheNode.Psi_t,
                    Nd4j.zeros(new int[] {1, cb.getTargetSpaceBlock().getNumElements()})));
    if (biasCovariatesEnabled) {
        mapWorkers(cb -> cb
                .cloneWithUpdatedPrimitive(CoverageModelEMComputeBlock.CoverageModelICGCacheNode.W_tl,
                        Nd4j.zeros(new int[] {cb.getTargetSpaceBlock().getNumElements(), numLatents})));
    }

    /* update read depth without taking into account correction from bias covariates */
    updateReadDepthPosteriorExpectations(1.0, true);

    /* fetch sample covariance matrix */
    final int minPCAInitializationReadCount = config.getMinPCAInitializationReadCount();
    mapWorkers(cb -> cb.cloneWithPCAInitializationData(minPCAInitializationReadCount, Integer.MAX_VALUE));
    cacheWorkers("PCA initialization");
    final INDArray targetCovarianceMatrix = mapWorkersAndReduce(
            CoverageModelEMComputeBlock::calculateTargetCovarianceMatrixForPCAInitialization,
            INDArray::add);

    /* perform eigen-decomposition on the target covariance matrix */
    final ImmutablePair<INDArray, INDArray> targetCovarianceEigensystem = CoverageModelEMWorkspaceMathUtils.eig(
            targetCovarianceMatrix, false, logger);

    /* the eigenvalues of sample covariance matrix can be immediately inferred by scaling */
    final INDArray sampleCovarianceEigenvalues = targetCovarianceEigensystem.getLeft().div(numSamples);

    /* estimate the isotropic unexplained variance -- see Bishop 12.46 */
    final int residualDim = numTargets - numLatents;
    final double isotropicVariance = sampleCovarianceEigenvalues.get(NDArrayIndex.interval(numLatents, numSamples))
            .sumNumber().doubleValue() / residualDim;
    logger.info(String.format("PCA estimate of isotropic unexplained variance: %f" , isotropicVariance));

    /* estimate bias factors -- see Bishop 12.45 */
    final INDArray scaleFactors = Transforms.sqrt(sampleCovarianceEigenvalues
            .get(NDArrayIndex.interval(0, numLatents)).sub(isotropicVariance), false);
    final INDArray biasCovariatesPCA = Nd4j.create(new int[] {numTargets, numLatents});
    for (int li = 0; li < numLatents; li++) {
        final INDArray v = targetCovarianceEigensystem.getRight().getColumn(li);
        /* calculate [Delta_PCA_st]^T v */
        /* note: we do not need to broadcast vec since it is small and lambda capture is just fine */
        final INDArray unnormedBiasCovariate = CoverageModelSparkUtils.assembleINDArrayBlocksFromCollection(
                mapWorkersAndCollect(cb -> ImmutablePair.of(cb.getTargetSpaceBlock(),
                        cb.getINDArrayFromCache(CoverageModelEMComputeBlock.CoverageModelICGCacheNode.Delta_PCA_st)
                                .transpose().mmul(v))), 0);
        final double norm = unnormedBiasCovariate.norm1Number().doubleValue();
        final INDArray normedBiasCovariate = unnormedBiasCovariate
                .divi(norm)
                .muli(scaleFactors.getDouble(li));
        biasCovariatesPCA.getColumn(li).assign(normedBiasCovariate);
    }
    if (ardEnabled) { /* a better estimate of ARD coefficients */
        biasCovariatesARDCoefficients.assign(Nd4j.zeros(new int[]{1, numLatents})
                .addi(config.getInitialARDPrecisionRelativeToNoise() / isotropicVariance));
    }

    final CoverageModelParameters modelParamsFromPCA = new CoverageModelParameters(
            processedTargetList,
            Nd4j.zeros(new int[] {1, numTargets}),
            Nd4j.zeros(new int[] {1, numTargets}).addi(isotropicVariance),
            biasCovariatesPCA,
            biasCovariatesARDCoefficients);

    /* clear PCA initialization data from workers */
    mapWorkers(CoverageModelEMComputeBlock::cloneWithRemovedPCAInitializationData);

    /* push model parameters to workers */
    initializeWorkersWithGivenModel(modelParamsFromPCA);

    /* update bias latent posterior expectations without admixing */
    updateBiasLatentPosteriorExpectations(1.0);
}
 
Example 8
Source File: StandardScaler.java    From nd4j with Apache License 2.0 4 votes vote down vote up
/**
 * Fit the given model
 * @param iterator the data to iterate oer
 */
public void fit(DataSetIterator iterator) {
    while (iterator.hasNext()) {
        DataSet next = iterator.next();
        runningTotal += next.numExamples();
        batchCount = next.getFeatures().size(0);
        if (mean == null) {
            //start with the mean and std of zero
            //column wise
            mean = next.getFeatureMatrix().mean(0);
            std = (batchCount == 1) ? Nd4j.zeros(mean.shape()) : Transforms.pow(next.getFeatureMatrix().std(0), 2);
            std.muli(batchCount);
        } else {
            // m_newM = m_oldM + (x - m_oldM)/m_n;
            // This only works if batch size is 1, m_newS = m_oldS + (x - m_oldM)*(x - m_newM);
            INDArray xMinusMean = next.getFeatureMatrix().subRowVector(mean);
            INDArray newMean = mean.add(xMinusMean.sum(0).divi(runningTotal));
            // Using http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf
            // for a version of calc variance when dataset is partitioned into two sample sets
            // Also described in https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
            // delta = mean_B - mean_A; A is data seen so far, B is the current batch
            // M2 is the var*n
            // M2 = M2_A + M2_B + delta^2 * nA * nB/(nA+nB)
            INDArray meanB = next.getFeatureMatrix().mean(0);
            INDArray deltaSq = Transforms.pow(meanB.subRowVector(mean), 2);
            INDArray deltaSqScaled =
                            deltaSq.mul(((float) runningTotal - batchCount) * batchCount / (float) runningTotal);
            INDArray mtwoB = Transforms.pow(next.getFeatureMatrix().std(0), 2);
            mtwoB.muli(batchCount);
            std = std.add(mtwoB);
            std = std.add(deltaSqScaled);
            mean = newMean;
        }

    }
    std.divi(runningTotal);
    std = Transforms.sqrt(std);
    std.addi(Nd4j.scalar(Nd4j.EPS_THRESHOLD));
    if (std.min(1) == Nd4j.scalar(Nd4j.EPS_THRESHOLD))
        logger.info("API_INFO: Std deviation found to be zero. Transform will round upto epsilon to avoid nans.");
    iterator.reset();
}
 
Example 9
Source File: StandardScaler.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
/**
 * Fit the given model
 * @param iterator the data to iterate oer
 */
public void fit(DataSetIterator iterator) {
    while (iterator.hasNext()) {
        DataSet next = iterator.next();
        runningTotal += next.numExamples();
        batchCount = next.getFeatures().size(0);
        if (mean == null) {
            //start with the mean and std of zero
            //column wise
            mean = next.getFeatures().mean(0);
            std = (batchCount == 1) ? Nd4j.zeros(mean.shape()) : Transforms.pow(next.getFeatures().std(0), 2);
            std.muli(batchCount);
        } else {
            // m_newM = m_oldM + (x - m_oldM)/m_n;
            // This only works if batch size is 1, m_newS = m_oldS + (x - m_oldM)*(x - m_newM);
            INDArray xMinusMean = next.getFeatures().subRowVector(mean);
            INDArray newMean = mean.add(xMinusMean.sum(0).divi(runningTotal));
            // Using http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf
            // for a version of calc variance when dataset is partitioned into two sample sets
            // Also described in https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
            // delta = mean_B - mean_A; A is data seen so far, B is the current batch
            // M2 is the var*n
            // M2 = M2_A + M2_B + delta^2 * nA * nB/(nA+nB)
            INDArray meanB = next.getFeatures().mean(0);
            INDArray deltaSq = Transforms.pow(meanB.subRowVector(mean), 2);
            INDArray deltaSqScaled =
                            deltaSq.mul(((float) runningTotal - batchCount) * batchCount / (float) runningTotal);
            INDArray mtwoB = Transforms.pow(next.getFeatures().std(0), 2);
            mtwoB.muli(batchCount);
            std = std.add(mtwoB);
            std = std.add(deltaSqScaled);
            mean = newMean;
        }

    }
    std.divi(runningTotal);
    std = Transforms.sqrt(std);
    std.addi(Nd4j.scalar(Nd4j.EPS_THRESHOLD));
    if (std.min(1) == Nd4j.scalar(Nd4j.EPS_THRESHOLD))
        logger.info("API_INFO: Std deviation found to be zero. Transform will round upto epsilon to avoid nans.");
    iterator.reset();
}