Java Code Examples for org.deeplearning4j.nn.conf.inputs.InputType#arrayElementsPerExample()

The following examples show how to use org.deeplearning4j.nn.conf.inputs.InputType#arrayElementsPerExample() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Subsampling3DLayer.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    InputType.InputTypeConvolutional3D c = (InputType.InputTypeConvolutional3D) inputType;
    InputType.InputTypeConvolutional3D outputType =
                    (InputType.InputTypeConvolutional3D) getOutputType(-1, inputType);
    val actElementsPerEx = outputType.arrayElementsPerExample();

    //During forward pass: im2col array + reduce. Reduce is counted as activations, so only im2col is working mem
    val im2colSizePerEx = c.getChannels() * outputType.getHeight() * outputType.getWidth() * outputType.getDepth()
                    * kernelSize[0] * kernelSize[1];

    //Current implementation does NOT cache im2col etc... which means: it's recalculated on each backward pass
    long trainingWorkingSizePerEx = im2colSizePerEx;
    if (getIDropout() != null) {
        //Dup on the input before dropout, but only for training
        trainingWorkingSizePerEx += inputType.arrayElementsPerExample();
    }

    return new LayerMemoryReport.Builder(layerName, Subsampling3DLayer.class, inputType, outputType)
                    .standardMemory(0, 0) //No params
                    .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}
 
Example 2
Source File: SubsamplingLayer.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType;
    InputType.InputTypeConvolutional outputType = (InputType.InputTypeConvolutional) getOutputType(-1, inputType);
    val actElementsPerEx = outputType.arrayElementsPerExample();

    //TODO Subsampling helper memory use... (CuDNN etc)

    //During forward pass: im2col array + reduce. Reduce is counted as activations, so only im2col is working mem
    val im2colSizePerEx = c.getChannels() * outputType.getHeight() * outputType.getWidth() * kernelSize[0]
                    * kernelSize[1];

    //Current implementation does NOT cache im2col etc... which means: it's recalculated on each backward pass
    long trainingWorkingSizePerEx = im2colSizePerEx;
    if (getIDropout() != null) {
        //Dup on the input before dropout, but only for training
        trainingWorkingSizePerEx += inputType.arrayElementsPerExample();
    }

    return new LayerMemoryReport.Builder(layerName, SubsamplingLayer.class, inputType, outputType)
                    .standardMemory(0, 0) //No params
                    .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}
 
Example 3
Source File: Upsampling1D.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    InputType.InputTypeRecurrent recurrent = (InputType.InputTypeRecurrent) inputType;
    InputType.InputTypeRecurrent outputType = (InputType.InputTypeRecurrent) getOutputType(-1, inputType);

    long im2colSizePerEx = recurrent.getSize() * outputType.getTimeSeriesLength() * size[0];
    long trainingWorkingSizePerEx = im2colSizePerEx;
    if (getIDropout() != null) {
        trainingWorkingSizePerEx += inputType.arrayElementsPerExample();
    }

    return new LayerMemoryReport.Builder(layerName, Upsampling1D.class, inputType, outputType).standardMemory(0, 0) //No params
                    .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}
 
Example 4
Source File: EmbeddingLayer.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    //Basically a dense layer, but no dropout is possible here, and no epsilons
    InputType outputType = getOutputType(-1, inputType);

    val actElementsPerEx = outputType.arrayElementsPerExample();
    val numParams = initializer().numParams(this);
    val updaterStateSize = (int) getIUpdater().stateSize(numParams);

    //Embedding layer does not use caching.
    //Inference: no working memory - just activations (pullRows)
    //Training: preout op, the only in-place ops on epsilon (from layer above) + assign ops

    return new LayerMemoryReport.Builder(layerName, EmbeddingLayer.class, inputType, outputType)
                    .standardMemory(numParams, updaterStateSize).workingMemory(0, 0, 0, actElementsPerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}
 
Example 5
Source File: Upsampling3D.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    InputType.InputTypeConvolutional3D c = (InputType.InputTypeConvolutional3D) inputType;
    InputType.InputTypeConvolutional3D outputType =
                    (InputType.InputTypeConvolutional3D) getOutputType(-1, inputType);

    // During forward pass: im2col array + reduce. Reduce is counted as activations, so only im2col is working mem
    val im2colSizePerEx = c.getChannels() & outputType.getDepth() * outputType.getHeight() * outputType.getWidth()
                    * size[0] * size[1] * size[2];

    // Current implementation does NOT cache im2col etc... which means: it's recalculated on each backward pass
    long trainingWorkingSizePerEx = im2colSizePerEx;
    if (getIDropout() != null) {
        //Dup on the input before dropout, but only for training
        trainingWorkingSizePerEx += inputType.arrayElementsPerExample();
    }

    return new LayerMemoryReport.Builder(layerName, Upsampling3D.class, inputType, outputType).standardMemory(0, 0) //No params
                    .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}
 
Example 6
Source File: Upsampling2D.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType;
    InputType.InputTypeConvolutional outputType = (InputType.InputTypeConvolutional) getOutputType(-1, inputType);

    // During forward pass: im2col array + reduce. Reduce is counted as activations, so only im2col is working mem
    val im2colSizePerEx =
                    c.getChannels() * outputType.getHeight() * outputType.getWidth() * size[0] * size[1];

    // Current implementation does NOT cache im2col etc... which means: it's recalculated on each backward pass
    long trainingWorkingSizePerEx = im2colSizePerEx;
    if (getIDropout() != null) {
        //Dup on the input before dropout, but only for training
        trainingWorkingSizePerEx += inputType.arrayElementsPerExample();
    }

    return new LayerMemoryReport.Builder(layerName, Upsampling2D.class, inputType, outputType).standardMemory(0, 0) //No params
                    .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}
 
Example 7
Source File: DropoutLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    val actElementsPerEx = inputType.arrayElementsPerExample();
    //During inference: not applied. During  backprop: dup the input, in case it's used elsewhere
    //But: this will be counted in the activations
    //(technically inference memory is over-estimated as a result)

    return new LayerMemoryReport.Builder(layerName, DropoutLayer.class, inputType, inputType).standardMemory(0, 0) //No params
                    .workingMemory(0, 0, 0, 0) //No working mem, other than activations etc
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}
 
Example 8
Source File: DenseLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    InputType outputType = getOutputType(-1, inputType);

    val numParams = initializer().numParams(this);
    val updaterStateSize = (int) getIUpdater().stateSize(numParams);

    int trainSizeFixed = 0;
    int trainSizeVariable = 0;
    if (getIDropout() != null) {
        if (false) {
            //TODO drop connect
            //Dup the weights... note that this does NOT depend on the minibatch size...
            trainSizeVariable += 0; //TODO
        } else {
            //Assume we dup the input
            trainSizeVariable += inputType.arrayElementsPerExample();
        }
    }

    //Also, during backprop: we do a preOut call -> gives us activations size equal to the output size
    // which is modified in-place by activation function backprop
    // then we have 'epsilonNext' which is equivalent to input size
    trainSizeVariable += outputType.arrayElementsPerExample();

    return new LayerMemoryReport.Builder(layerName, DenseLayer.class, inputType, outputType)
                    .standardMemory(numParams, updaterStateSize)
                    .workingMemory(0, 0, trainSizeFixed, trainSizeVariable) //No additional memory (beyond activations) for inference
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayer
                    .build();
}
 
Example 9
Source File: CustomLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    //Memory report is used to estimate how much memory is required for the layer, for different configurations
    //If you don't need this functionality for your custom layer, you can return a LayerMemoryReport
    // with all 0s, or

    //This implementation: based on DenseLayer implementation
    InputType outputType = getOutputType(-1, inputType);

    val numParams = initializer().numParams(this);
    int updaterStateSize = (int) getIUpdater().stateSize(numParams);

    int trainSizeFixed = 0;
    int trainSizeVariable = 0;
    if (getIDropout() != null) {
        //Assume we dup the input for dropout
        trainSizeVariable += inputType.arrayElementsPerExample();
    }

    //Also, during backprop: we do a preOut call -> gives us activations size equal to the output size
    // which is modified in-place by activation function backprop
    // then we have 'epsilonNext' which is equivalent to input size
    trainSizeVariable += outputType.arrayElementsPerExample();

    return new LayerMemoryReport.Builder(layerName, CustomLayer.class, inputType, outputType)
            .standardMemory(numParams, updaterStateSize)
            .workingMemory(0, 0, trainSizeFixed,
                    trainSizeVariable)     //No additional memory (beyond activations) for inference
            .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS,
                    MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayer
            .build();
}
 
Example 10
Source File: BaseOutputLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    //Basically a dense layer...
    InputType outputType = getOutputType(-1, inputType);

    val numParams = initializer().numParams(this);
    val updaterStateSize = (int) getIUpdater().stateSize(numParams);

    int trainSizeFixed = 0;
    int trainSizeVariable = 0;
    if (getIDropout() != null) {
        if (false) {
            //TODO drop connect
            //Dup the weights... note that this does NOT depend on the minibatch size...
            trainSizeVariable += 0; //TODO
        } else {
            //Assume we dup the input
            trainSizeVariable += inputType.arrayElementsPerExample();
        }
    }

    //Also, during backprop: we do a preOut call -> gives us activations size equal to the output size
    // which is modified in-place by activation function backprop
    // then we have 'epsilonNext' which is equivalent to input size
    trainSizeVariable += outputType.arrayElementsPerExample();

    return new LayerMemoryReport.Builder(layerName, OutputLayer.class, inputType, outputType)
                    .standardMemory(numParams, updaterStateSize)
                    .workingMemory(0, 0, trainSizeFixed, trainSizeVariable) //No additional memory (beyond activations) for inference
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}
 
Example 11
Source File: EmbeddingSequenceLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    InputType outputType = getOutputType(-1, inputType);

    val actElementsPerEx = outputType.arrayElementsPerExample();
    val numParams = initializer().numParams(this);
    val updaterStateSize = (int) getIUpdater().stateSize(numParams);

    return new LayerMemoryReport.Builder(layerName, EmbeddingSequenceLayer.class, inputType, outputType)
                    .standardMemory(numParams, updaterStateSize).workingMemory(0, 0, 0, actElementsPerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}
 
Example 12
Source File: CenterLossOutputLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    //Basically a dense layer, with some extra params...
    InputType outputType = getOutputType(-1, inputType);

    val nParamsW = nIn * nOut;
    val nParamsB = nOut;
    val nParamsCenter = nIn * nOut;
    val numParams = nParamsW + nParamsB + nParamsCenter;

    int updaterStateSize = (int) (getUpdaterByParam(CenterLossParamInitializer.WEIGHT_KEY).stateSize(nParamsW)
                    + getUpdaterByParam(CenterLossParamInitializer.BIAS_KEY).stateSize(nParamsB)
                    + getUpdaterByParam(CenterLossParamInitializer.CENTER_KEY).stateSize(nParamsCenter));

    int trainSizeFixed = 0;
    int trainSizeVariable = 0;
    if (getIDropout() != null) {
        if (false) {
            //TODO drop connect
            //Dup the weights... note that this does NOT depend on the minibatch size...
            trainSizeVariable += 0; //TODO
        } else {
            //Assume we dup the input
            trainSizeVariable += inputType.arrayElementsPerExample();
        }
    }

    //Also, during backprop: we do a preOut call -> gives us activations size equal to the output size
    // which is modified in-place by activation function backprop
    // then we have 'epsilonNext' which is equivalent to input size
    trainSizeVariable += outputType.arrayElementsPerExample();

    return new LayerMemoryReport.Builder(layerName, CenterLossOutputLayer.class, inputType, outputType)
                    .standardMemory(numParams, updaterStateSize)
                    .workingMemory(0, 0, trainSizeFixed, trainSizeVariable) //No additional memory (beyond activations) for inference
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}
 
Example 13
Source File: Yolo2OutputLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    long numValues = inputType.arrayElementsPerExample();

    //This is a VERY rough estimate...
    return new LayerMemoryReport.Builder(layerName, Yolo2OutputLayer.class, inputType, inputType)
                    .standardMemory(0, 0) //No params
                    .workingMemory(0, numValues, 0, 6 * numValues).cacheMemory(0, 0) //No cache
                    .build();
}
 
Example 14
Source File: LocalResponseNormalization.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    val actElementsPerEx = inputType.arrayElementsPerExample();

    //Forward pass: 3x input size as working memory, in addition to output activations
    //Backward pass: 2x input size as working memory, in addition to epsilons

    return new LayerMemoryReport.Builder(layerName, DenseLayer.class, inputType, inputType).standardMemory(0, 0)
                    .workingMemory(0, 2 * actElementsPerEx, 0, 3 * actElementsPerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayer
                    .build();
}
 
Example 15
Source File: ActivationLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    val actElementsPerEx = inputType.arrayElementsPerExample();

    return new LayerMemoryReport.Builder(layerName, ActivationLayer.class, inputType, inputType)
                    .standardMemory(0, 0) //No params
                    //During inference: modify input activation in-place
                    //During  backprop: dup the input for later re-use
                    .workingMemory(0, 0, 0, actElementsPerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}
 
Example 16
Source File: ElementWiseMultiplicationLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * This is a report of the estimated memory consumption for the given layer
 *
 * @param inputType Input type to the layer. Memory consumption is often a function of the input type
 * @return Memory report for the layer
 */
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    InputType outputType = getOutputType(-1, inputType);

    val numParams = initializer().numParams(this);
    val updaterStateSize = (int) getIUpdater().stateSize(numParams);

    int trainSizeFixed = 0;
    int trainSizeVariable = 0;
    if (getIDropout() != null) {
        if (false) {
            //TODO drop connect
            //Dup the weights... note that this does NOT depend on the minibatch size...
            trainSizeVariable += 0; //TODO
        } else {
            //Assume we dup the input
            trainSizeVariable += inputType.arrayElementsPerExample();
        }
    }

    //Also, during backprop: we do a preOut call -> gives us activations size equal to the output size
    // which is modified in-place by activation function backprop
    // then we have 'epsilonNext' which is equivalent to input size
    trainSizeVariable += outputType.arrayElementsPerExample();

    return new LayerMemoryReport.Builder(layerName, ElementWiseMultiplicationLayer.class, inputType, outputType)
                    .standardMemory(numParams, updaterStateSize)
                    .workingMemory(0, 0, trainSizeFixed, trainSizeVariable) //No additional memory (beyond activations) for inference
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayer
                    .build();
}
 
Example 17
Source File: BatchNormalization.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    InputType outputType = getOutputType(-1, inputType);

    //TODO CuDNN helper etc

    val numParams = initializer().numParams(this);
    int updaterStateSize = 0;

    for (String s : BatchNormalizationParamInitializer.getInstance().paramKeys(this)) {
        updaterStateSize += getUpdaterByParam(s).stateSize(nOut);
    }

    //During forward pass: working memory size approx. equal to 2x input size (copy ops, etc)
    val inferenceWorkingSize = 2 * inputType.arrayElementsPerExample();

    //During training: we calculate mean and variance... result is equal to nOut, and INDEPENDENT of minibatch size
    val trainWorkFixed = 2 * nOut;
    //During backprop: multiple working arrays... output size, 2 * output size (indep. of example size),
    val trainWorkingSizePerExample = inferenceWorkingSize //Inference during backprop
                    + (outputType.arrayElementsPerExample() + 2 * nOut); //Backprop gradient calculation

    return new LayerMemoryReport.Builder(layerName, BatchNormalization.class, inputType, outputType)
                    .standardMemory(numParams, updaterStateSize)
                    .workingMemory(0, 0, trainWorkFixed, trainWorkingSizePerExample) //No additional memory (beyond activations) for inference
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}
 
Example 18
Source File: AutoEncoder.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    //Because of supervised + unsupervised modes: we'll assume unsupervised, which has the larger memory requirements
    InputType outputType = getOutputType(-1, inputType);

    val actElementsPerEx = outputType.arrayElementsPerExample() + inputType.arrayElementsPerExample();
    val numParams = initializer().numParams(this);
    val updaterStateSize = (int) getIUpdater().stateSize(numParams);

    int trainSizePerEx = 0;
    if (getIDropout() != null) {
        if (false) {
            //TODO drop connect
            //Dup the weights... note that this does NOT depend on the minibatch size...
        } else {
            //Assume we dup the input
            trainSizePerEx += inputType.arrayElementsPerExample();
        }
    }

    //Also, during backprop: we do a preOut call -> gives us activations size equal to the output size
    // which is modified in-place by loss function
    trainSizePerEx += actElementsPerEx;

    return new LayerMemoryReport.Builder(layerName, AutoEncoder.class, inputType, outputType)
                    .standardMemory(numParams, updaterStateSize).workingMemory(0, 0, 0, trainSizePerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}
 
Example 19
Source File: VariationalAutoencoder.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    //For training: we'll assume unsupervised pretraining, as this has higher memory requirements

    InputType outputType = getOutputType(-1, inputType);

    val actElementsPerEx = outputType.arrayElementsPerExample();
    val numParams = initializer().numParams(this);
    int updaterStateSize = (int) getIUpdater().stateSize(numParams);

    int inferenceWorkingMemSizePerEx = 0;
    //Forward pass size through the encoder:
    for (int i = 1; i < encoderLayerSizes.length; i++) {
        inferenceWorkingMemSizePerEx += encoderLayerSizes[i];
    }

    //Forward pass size through the decoder, during training
    //p(Z|X) mean and stdev; pzxSigmaSquared, pzxSigma -> all size equal to nOut
    long decoderFwdSizeWorking = 4 * nOut;
    //plus, nSamples * decoder size
    //For each decoding: random sample (nOut), z (nOut), activations for each decoder layer
    decoderFwdSizeWorking += numSamples * (2 * nOut + ArrayUtil.sum(getDecoderLayerSizes()));
    //Plus, component of score
    decoderFwdSizeWorking += nOut;

    //Backprop size through the decoder and decoder: approx. 2x forward pass size
    long trainWorkingMemSize = 2 * (inferenceWorkingMemSizePerEx + decoderFwdSizeWorking);

    if (getIDropout() != null) {
        if (false) {
            //TODO drop connect
            //Dup the weights... note that this does NOT depend on the minibatch size...
        } else {
            //Assume we dup the input
            trainWorkingMemSize += inputType.arrayElementsPerExample();
        }
    }

    return new LayerMemoryReport.Builder(layerName, VariationalAutoencoder.class, inputType, outputType)
                    .standardMemory(numParams, updaterStateSize)
                    .workingMemory(0, inferenceWorkingMemSizePerEx, 0, trainWorkingMemSize)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}
 
Example 20
Source File: ConvolutionLayer.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    val paramSize = initializer().numParams(this);
    val updaterStateSize = (int) getIUpdater().stateSize(paramSize);

    InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType;
    InputType.InputTypeConvolutional outputType = (InputType.InputTypeConvolutional) getOutputType(-1, inputType);

    //TODO convolution helper memory use... (CuDNN etc)

    //During forward pass: im2col array, mmul (result activations), in-place broadcast add
    val im2colSizePerEx = c.getChannels() * outputType.getHeight() * outputType.getWidth() * kernelSize[0]
                    * kernelSize[1];

    //During training: have im2col array, in-place gradient calculation, then epsilons...
    //But: im2col array may be cached...
    Map<CacheMode, Long> trainWorkingMemoryPerEx = new HashMap<>();
    Map<CacheMode, Long> cachedPerEx = new HashMap<>();

    //During backprop: im2col array for forward pass (possibly cached) + the epsilon6d array required to calculate
    // the 4d epsilons (equal size to input)
    //Note that the eps6d array is same size as im2col
    for (CacheMode cm : CacheMode.values()) {
        long trainWorkingSizePerEx;
        long cacheMemSizePerEx = 0;
        if (cm == CacheMode.NONE) {
            trainWorkingSizePerEx = 2 * im2colSizePerEx;
        } else {
            //im2col is cached, but epsNext2d/eps6d is not
            cacheMemSizePerEx = im2colSizePerEx;
            trainWorkingSizePerEx = im2colSizePerEx;
        }

        if (getIDropout() != null) {
            //Dup on the input before dropout, but only for training
            trainWorkingSizePerEx += inputType.arrayElementsPerExample();
        }

        trainWorkingMemoryPerEx.put(cm, trainWorkingSizePerEx);
        cachedPerEx.put(cm, cacheMemSizePerEx);
    }

    return new LayerMemoryReport.Builder(layerName, ConvolutionLayer.class, inputType, outputType)
                    .standardMemory(paramSize, updaterStateSize)
                    //im2col caching -> only variable size caching
                    .workingMemory(0, im2colSizePerEx, MemoryReport.CACHE_MODE_ALL_ZEROS, trainWorkingMemoryPerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, cachedPerEx).build();

}