org.nd4j.linalg.schedule.ScheduleType Java Examples

The following examples show how to use org.nd4j.linalg.schedule.ScheduleType. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SigmoidScheduleSpace.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public SigmoidScheduleSpace(@NonNull @JsonProperty("scheduleType") ScheduleType scheduleType,
                            @NonNull @JsonProperty("initialValue") ParameterSpace<Double> initialValue,
                            @NonNull @JsonProperty("gamma") ParameterSpace<Double> gamma,
                            @NonNull @JsonProperty("stepSize") ParameterSpace<Integer> stepSize){
    this.scheduleType = scheduleType;
    this.initialValue = initialValue;
    this.gamma = gamma;
    this.stepSize = stepSize;
}
 
Example #2
Source File: StepScheduleSpace.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public StepScheduleSpace(@NonNull @JsonProperty("scheduleType") ScheduleType scheduleType,
                         @NonNull @JsonProperty("initialValue") ParameterSpace<Double> initialValue,
                         @NonNull @JsonProperty("decayRate") ParameterSpace<Double> decayRate,
                         @NonNull @JsonProperty("step") ParameterSpace<Double> step){
    this.scheduleType = scheduleType;
    this.initialValue = initialValue;
    this.decayRate = decayRate;
    this.step = step;
}
 
Example #3
Source File: InverseScheduleSpace.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public InverseScheduleSpace(@NonNull @JsonProperty("scheduleType") ScheduleType scheduleType,
                            @NonNull @JsonProperty("initialValue") ParameterSpace<Double> initialValue,
                            @NonNull @JsonProperty("gamma") ParameterSpace<Double> gamma,
                            @NonNull @JsonProperty("power") ParameterSpace<Double> power){
    this.scheduleType = scheduleType;
    this.initialValue = initialValue;
    this.gamma = gamma;
    this.power = power;
}
 
Example #4
Source File: LayerConfigValidationTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testNesterovsNotSetGlobal() {
    // Warnings only thrown
    Map<Integer, Double> testMomentumAfter = new HashMap<>();
    testMomentumAfter.put(0, 0.1);

    MultiLayerConfiguration conf =
            new NeuralNetConfiguration.Builder().updater(new Nesterovs(1.0, new MapSchedule(ScheduleType.ITERATION, testMomentumAfter))).list()
                    .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build())
                    .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
}
 
Example #5
Source File: ExponentialScheduleSpace.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public ExponentialScheduleSpace(@NonNull @JsonProperty("scheduleType") ScheduleType scheduleType,
                                @NonNull @JsonProperty("initialValue") ParameterSpace<Double> initialValue,
                                @NonNull @JsonProperty("gamma") ParameterSpace<Double> gamma){
    this.scheduleType = scheduleType;
    this.initialValue = initialValue;
    this.gamma = gamma;
}
 
Example #6
Source File: PolyScheduleSpace.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public PolyScheduleSpace(@NonNull @JsonProperty("scheduleType") ScheduleType scheduleType,
                         @NonNull @JsonProperty("initialValue") ParameterSpace<Double> initialValue,
                         @NonNull @JsonProperty("power") ParameterSpace<Double> power,
                         @NonNull @JsonProperty("maxIter") ParameterSpace<Integer> maxIter){
    this.scheduleType = scheduleType;
    this.initialValue = initialValue;
    this.power = power;
    this.maxIter = maxIter;
}
 
Example #7
Source File: PolyScheduleSpace.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
public PolyScheduleSpace(@NonNull ScheduleType scheduleType, @NonNull ParameterSpace<Double> initialValue,
                         double power, int maxIter){
    this(scheduleType, initialValue, new FixedValue<>(power), new FixedValue<>(maxIter));
}
 
Example #8
Source File: SigmoidScheduleSpace.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
public SigmoidScheduleSpace(@NonNull ScheduleType scheduleType, @NonNull ParameterSpace<Double> initialValue,
                            double gamma, int stepSize){
    this(scheduleType, initialValue, new FixedValue<>(gamma), new FixedValue<>(stepSize));
}
 
Example #9
Source File: StepScheduleSpace.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
public StepScheduleSpace(@NonNull ScheduleType scheduleType, @NonNull ParameterSpace<Double> initialValue,
                         double decayRate, double step){
    this(scheduleType, initialValue, new FixedValue<>(decayRate), new FixedValue<>(step));
}
 
Example #10
Source File: InverseScheduleSpace.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
public InverseScheduleSpace(@NonNull ScheduleType scheduleType, @NonNull ParameterSpace<Double> initialValue,
                            double gamma, double power){
    this(scheduleType, initialValue, new FixedValue<>(gamma), new FixedValue<>(power));
}
 
Example #11
Source File: ExponentialScheduleSpace.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
public ExponentialScheduleSpace(@NonNull ScheduleType scheduleType,
                                @NonNull ParameterSpace<Double> initialValue, double gamma){
    this(scheduleType, initialValue, new FixedValue<>(gamma));
}
 
Example #12
Source File: DL4JSequenceRecommender.java    From inception with Apache License 2.0 4 votes vote down vote up
private MultiLayerNetwork createConfiguredNetwork(DL4JSequenceRecommenderTraits aTraits,
        int aEmbeddingsDim)
{
    long start = System.currentTimeMillis();
    
    // Set up network configuration
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(aTraits.getOptimizationAlgorithm())
            .updater(new Nesterovs(
                    new StepSchedule(ScheduleType.ITERATION, 1e-2, 0.1, 100000), 0.9))
            .biasUpdater(new Nesterovs(
                    new StepSchedule(ScheduleType.ITERATION, 2e-2, 0.1, 100000), 0.9))
            .l2(aTraits.getL2())
            .weightInit(aTraits.getWeightInit())
            .gradientNormalization(aTraits.getGradientNormalization())
            .gradientNormalizationThreshold(aTraits.getGradientNormalizationThreshold())
            .list()
            .layer(0, new Bidirectional(Bidirectional.Mode.ADD, new LSTM.Builder()
                    .nIn(aEmbeddingsDim)
                    .nOut(200)
                    .activation(aTraits.getActivationL0())
                    .build()))
            .layer(1, new RnnOutputLayer.Builder()
                    .nIn(200)
                    .nOut(aTraits.getMaxTagsetSize())
                    .activation(aTraits.getActivationL1())
                    .lossFunction(aTraits.getLossFunction())
                    .build())
            .build();
    
    // log.info("Network configuration: {}", conf.toYaml());

    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();

    // net.setListeners(new ScoreIterationListener(1));
    
    log.trace("Setting up the model took {}ms", System.currentTimeMillis() - start);
    
    return net;
}
 
Example #13
Source File: TestKryo.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testSerializationConfigurations() {

    SerializerInstance si = sc.env().serializer().newInstance();

    //Check network configurations:
    Map<Integer, Double> m = new HashMap<>();
    m.put(0, 0.5);
    m.put(10, 0.1);
    MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder()
                    .updater(new Nadam(new MapSchedule(ScheduleType.ITERATION,m))).list().layer(0, new OutputLayer.Builder().nIn(10).nOut(10).build())
                    .build();

    testSerialization(mlc, si);


    ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder()
                    .dist(new UniformDistribution(-1, 1))
                    .updater(new Adam(new MapSchedule(ScheduleType.ITERATION,m)))
                    .graphBuilder()
                    .addInputs("in").addLayer("out", new OutputLayer.Builder().nIn(10).nOut(10).build(), "in")
                    .setOutputs("out").build();

    testSerialization(cgc, si);


    //Check main layers:
    Layer[] layers = new Layer[] {new OutputLayer.Builder().nIn(10).nOut(10).build(),
                    new RnnOutputLayer.Builder().nIn(10).nOut(10).build(), new LossLayer.Builder().build(),
                    new CenterLossOutputLayer.Builder().nIn(10).nOut(10).build(),
                    new DenseLayer.Builder().nIn(10).nOut(10).build(),
                    new ConvolutionLayer.Builder().nIn(10).nOut(10).build(), new SubsamplingLayer.Builder().build(),
                    new Convolution1DLayer.Builder(2, 2).nIn(10).nOut(10).build(),
                    new ActivationLayer.Builder().activation(Activation.TANH).build(),
                    new GlobalPoolingLayer.Builder().build(), new GravesLSTM.Builder().nIn(10).nOut(10).build(),
                    new LSTM.Builder().nIn(10).nOut(10).build(), new DropoutLayer.Builder(0.5).build(),
                    new BatchNormalization.Builder().build(), new LocalResponseNormalization.Builder().build()};

    for (Layer l : layers) {
        testSerialization(l, si);
    }

    //Check graph vertices
    GraphVertex[] vertices = new GraphVertex[] {new ElementWiseVertex(ElementWiseVertex.Op.Add),
                    new L2NormalizeVertex(), new LayerVertex(null, null), new MergeVertex(), new PoolHelperVertex(),
                    new PreprocessorVertex(new CnnToFeedForwardPreProcessor(28, 28, 1)),
                    new ReshapeVertex(new int[] {1, 1}), new ScaleVertex(1.0), new ShiftVertex(1.0),
                    new SubsetVertex(1, 1), new UnstackVertex(0, 2), new DuplicateToTimeSeriesVertex("in1"),
                    new LastTimeStepVertex("in1")};

    for (GraphVertex gv : vertices) {
        testSerialization(gv, si);
    }
}
 
Example #14
Source File: ValidateCuDNN.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test @Ignore //AB 2019/05/20 - https://github.com/deeplearning4j/deeplearning4j/issues/5088 - ignored to get to "all passing" state for CI, and revisit later
public void validateConvLayersLRN() {
    //Test ONLY LRN - no other CuDNN functionality (i.e., DL4J impls for everything else)
    Nd4j.getRandom().setSeed(12345);

    int minibatch = 8;
    int numClasses = 10;
    //imageHeight,imageWidth,channels
    int imageHeight = 48;
    int imageWidth = 48;
    int channels = 3;
    IActivation activation = new ActivationIdentity();
    MultiLayerConfiguration multiLayerConfiguration = new NeuralNetConfiguration.Builder()
            .dataType(DataType.DOUBLE)
            .weightInit(WeightInit.XAVIER).seed(42)
            .activation(new ActivationELU())
            .updater(Nesterovs.builder()
                    .momentum(0.9)
                    .learningRateSchedule(new StepSchedule(
                            ScheduleType.EPOCH,
                            1e-2,
                            0.1,
                            20)).build()).list(
                    new Convolution2D.Builder().nOut(96)
                            .kernelSize(11, 11).biasInit(0.0)
                            .stride(4, 4).build(),
                    new ActivationLayer.Builder().activation(activation).build(),
                    new LocalResponseNormalization.Builder()
                            .alpha(1e-3).beta(0.75).k(2)
                            .n(5).build(),
                    new Pooling2D.Builder()
                            .poolingType(SubsamplingLayer.PoolingType.MAX)
                            .kernelSize(3, 3).stride(2, 2)
                            .build(),
                    new Convolution2D.Builder().nOut(256)
                            .kernelSize(5, 5).padding(2, 2)
                            .biasInit(0.0)
                            .stride(1, 1).build(),
                    new ActivationLayer.Builder().activation(activation).build(),
                    new OutputLayer.Builder().activation(new ActivationSoftmax())
                            .lossFunction(new LossNegativeLogLikelihood())
                            .nOut(numClasses)
                            .biasInit(0.0)
                            .build())
            .setInputType(InputType.convolutionalFlat(imageHeight, imageWidth, channels))
            .build();

    MultiLayerNetwork net = new MultiLayerNetwork(multiLayerConfiguration);
    net.init();

    int[] fShape = new int[]{minibatch, channels, imageHeight, imageWidth};
    int[] lShape = new int[]{minibatch, numClasses};

    List<Class<?>> classesToTest = new ArrayList<>();
    classesToTest.add(org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization.class);

    validateLayers(net, classesToTest, false, fShape, lShape, 1e-2, 1e-2);
}
 
Example #15
Source File: ValidateCuDNN.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void validateConvLayersSimpleBN() {
    //Test ONLY BN - no other CuDNN functionality (i.e., DL4J impls for everything else)
    Nd4j.getRandom().setSeed(12345);

    int minibatch = 8;
    int numClasses = 10;
    //imageHeight,imageWidth,channels
    int imageHeight = 48;
    int imageWidth = 48;
    int channels = 3;
    IActivation activation = new ActivationIdentity();
    MultiLayerConfiguration multiLayerConfiguration = new NeuralNetConfiguration.Builder()
            .dataType(DataType.DOUBLE)
            .weightInit(WeightInit.XAVIER).seed(42)
            .activation(new ActivationELU())
            .updater(Nesterovs.builder()
                    .momentum(0.9)
                    .learningRateSchedule(new StepSchedule(
                            ScheduleType.EPOCH,
                            1e-2,
                            0.1,
                            20)).build()).list(
                    new Convolution2D.Builder().nOut(96)
                            .kernelSize(11, 11).biasInit(0.0)
                            .stride(4, 4).build(),
                    new ActivationLayer.Builder().activation(activation).build(),
                    new BatchNormalization.Builder().build(),
                    new Pooling2D.Builder()
                            .poolingType(SubsamplingLayer.PoolingType.MAX)
                            .kernelSize(3, 3).stride(2, 2)
                            .build(),
                    new DenseLayer.Builder()
                            .nOut(128)
                            .biasInit(0.0)
                            .build(),
                    new ActivationLayer.Builder().activation(activation).build(),
                    new OutputLayer.Builder().activation(new ActivationSoftmax())
                            .lossFunction(new LossNegativeLogLikelihood())
                            .nOut(numClasses)
                            .biasInit(0.0)
                            .build())
            .setInputType(InputType.convolutionalFlat(imageHeight, imageWidth, channels))
            .build();

    MultiLayerNetwork net = new MultiLayerNetwork(multiLayerConfiguration);
    net.init();

    int[] fShape = new int[]{minibatch, channels, imageHeight, imageWidth};
    int[] lShape = new int[]{minibatch, numClasses};

    List<Class<?>> classesToTest = new ArrayList<>();
    classesToTest.add(org.deeplearning4j.nn.layers.normalization.BatchNormalization.class);

    validateLayers(net, classesToTest, false, fShape, lShape, CuDNNValidationUtil.MAX_REL_ERROR, CuDNNValidationUtil.MIN_ABS_ERROR);
}
 
Example #16
Source File: TestWeightNoise.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testWeightNoiseConfigJson() {
    IWeightNoise[] weightNoises = new IWeightNoise[]{
            new DropConnect(0.5),
            new DropConnect(new SigmoidSchedule(ScheduleType.ITERATION, 0.5, 0.5, 100)),
            new WeightNoise(new NormalDistribution(0, 0.1))
    };

    for (IWeightNoise wn : weightNoises) {
        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                .weightNoise(wn)
                .list()
                .layer(new DenseLayer.Builder().nIn(10).nOut(10).build())
                .layer(new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(new DropConnect(0.25)).build())
                .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build())
                .build();

        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();

        assertEquals(wn, ((BaseLayer) net.getLayer(0).conf().getLayer()).getWeightNoise());
        assertEquals(new DropConnect(0.25), ((BaseLayer) net.getLayer(1).conf().getLayer()).getWeightNoise());
        assertEquals(wn, ((BaseLayer) net.getLayer(2).conf().getLayer()).getWeightNoise());

        TestUtils.testModelSerialization(net);


        ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder()
                .weightNoise(wn)
                .graphBuilder()
                .addInputs("in")
                .layer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in")
                .layer("1", new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(new DropConnect(0.25)).build(), "0")
                .layer("2", new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "1")
                .setOutputs("2")
                .build();

        ComputationGraph graph = new ComputationGraph(conf2);
        graph.init();

        assertEquals(wn, ((BaseLayer) graph.getLayer(0).conf().getLayer()).getWeightNoise());
        assertEquals(new DropConnect(0.25), ((BaseLayer) graph.getLayer(1).conf().getLayer()).getWeightNoise());
        assertEquals(wn, ((BaseLayer) graph.getLayer(2).conf().getLayer()).getWeightNoise());

        TestUtils.testModelSerialization(graph);

        graph.fit(new DataSet(Nd4j.create(1,10), Nd4j.create(1,10)));
    }
}
 
Example #17
Source File: TestDropout.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testDropoutValues(){
    Nd4j.getRandom().setSeed(12345);

    Dropout d = new Dropout(0.5);

    INDArray in = Nd4j.ones(10, 10);
    INDArray out = d.applyDropout(in, Nd4j.create(10,10), 0, 0, LayerWorkspaceMgr.noWorkspacesImmutable());

    assertEquals(in, Nd4j.ones(10, 10));

    int countZeros = Nd4j.getExecutioner().exec(new MatchCondition(out, Conditions.equals(0))).getInt(0);
    int countTwos = Nd4j.getExecutioner().exec(new MatchCondition(out, Conditions.equals(2))).getInt(0);

    assertEquals(100, countZeros + countTwos);  //Should only be 0 or 2
    //Stochastic, but this should hold for most cases
    assertTrue(countZeros >= 25 && countZeros <= 75);
    assertTrue(countTwos >= 25 && countTwos <= 75);

    //Test schedule:
    d = new Dropout(new MapSchedule.Builder(ScheduleType.ITERATION).add(0, 0.5).add(5, 0.1).build());
    for( int i=0; i<10; i++ ) {
        out = d.applyDropout(in, Nd4j.create(in.shape()), i, 0, LayerWorkspaceMgr.noWorkspacesImmutable());
        assertEquals(in, Nd4j.ones(10, 10));
        countZeros = Nd4j.getExecutioner().exec(new MatchCondition(out, Conditions.equals(0))).getInt(0);

        if(i < 5){
            countTwos = Nd4j.getExecutioner().exec(new MatchCondition(out, Conditions.equals(2))).getInt(0);
            assertEquals(String.valueOf(i), 100, countZeros + countTwos);  //Should only be 0 or 2
            //Stochastic, but this should hold for most cases
            assertTrue(countZeros >= 25 && countZeros <= 75);
            assertTrue(countTwos >= 25 && countTwos <= 75);
        } else {
            int countInverse = Nd4j.getExecutioner().exec(new MatchCondition(out, Conditions.equals(1.0/0.1))).getInt(0);
            assertEquals(100, countZeros + countInverse);  //Should only be 0 or 10
            //Stochastic, but this should hold for most cases
            assertTrue(countZeros >= 80);
            assertTrue(countInverse <= 20);
        }
    }
}
 
Example #18
Source File: TestLrChanges.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testChangeLrCompGraphSchedule(){
    //First: Set LR for a *single* layer and compare vs. equivalent net config
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
            .activation(Activation.TANH)
            .seed(12345)
            .updater(new Adam(0.1))
            .graphBuilder()
            .addInputs("in")
            .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in")
            .addLayer("1", new DenseLayer.Builder().nIn(10).nOut(10).build(), "0")
            .addLayer("2", new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "1")
            .setOutputs("2")
            .build();

    ComputationGraph net = new ComputationGraph(conf);
    net.init();

    for( int i=0; i<10; i++ ){
        net.fit(new DataSet(Nd4j.rand(10,10), Nd4j.rand(10,10)));
    }


    ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder()
            .activation(Activation.TANH)
            .seed(12345)
            .updater(new Adam(new ExponentialSchedule(ScheduleType.ITERATION, 0.5, 0.8 )))
            .graphBuilder()
            .addInputs("in")
            .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in")
            .addLayer("1", new DenseLayer.Builder().nIn(10).nOut(10).build(), "0")
            .layer("2", new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "1")
            .setOutputs("2")
            .build();
    ComputationGraph net2 = new ComputationGraph(conf2);
    net2.init();
    net2.getUpdater().getStateViewArray().assign(net.getUpdater().getStateViewArray());
    conf2.setIterationCount(conf.getIterationCount());
    net2.setParams(net.params().dup());

    net.setLearningRate(new ExponentialSchedule(ScheduleType.ITERATION, 0.5, 0.8 ));  //Set LR for layer 0 to 0.5

    assertEquals(conf, conf2);
    assertEquals(conf.toJson(), conf2.toJson());

    assertEquals(net.getUpdater().getStateViewArray(), net2.getUpdater().getStateViewArray());

    //Perform some parameter updates - check things are actually in sync...
    for( int i=0; i<3; i++ ){
        INDArray in = Nd4j.rand(10, 10);
        INDArray l = Nd4j.rand(10, 10);

        net.fit(new DataSet(in, l));
        net2.fit(new DataSet(in, l));
    }

    assertEquals(net.params(), net2.params());
    assertEquals(net.getUpdater().getStateViewArray(), net2.getUpdater().getStateViewArray());
}
 
Example #19
Source File: TestLrChanges.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testChangeLrMLNSchedule(){
    //First: Set LR for a *single* layer and compare vs. equivalent net config
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .activation(Activation.TANH)
            .seed(12345)
            .updater(new Adam(0.1))
            .list()
            .layer(new DenseLayer.Builder().nIn(10).nOut(10).build())
            .layer(new DenseLayer.Builder().nIn(10).nOut(10).build())
            .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build())
            .build();

    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();

    for( int i=0; i<10; i++ ){
        net.fit(Nd4j.rand(10,10), Nd4j.rand(10,10));
    }


    MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder()
            .activation(Activation.TANH)
            .seed(12345)
            .updater(new Adam(new ExponentialSchedule(ScheduleType.ITERATION, 0.5, 0.8 )))
            .list()
            .layer(new DenseLayer.Builder().nIn(10).nOut(10).build())
            .layer(new DenseLayer.Builder().nIn(10).nOut(10).build())
            .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build())
            .build();
    MultiLayerNetwork net2 = new MultiLayerNetwork(conf2);
    net2.init();
    net2.getUpdater().getStateViewArray().assign(net.getUpdater().getStateViewArray());
    conf2.setIterationCount(conf.getIterationCount());
    net2.setParams(net.params().dup());

    net.setLearningRate(new ExponentialSchedule(ScheduleType.ITERATION, 0.5, 0.8 ));  //Set LR for layer 0 to 0.5

    assertEquals(conf, conf2);
    assertEquals(conf.toJson(), conf2.toJson());

    assertEquals(net.getUpdater().getStateViewArray(), net2.getUpdater().getStateViewArray());

    //Perform some parameter updates - check things are actually in sync...
    for( int i=0; i<3; i++ ){
        INDArray in = Nd4j.rand(10, 10);
        INDArray l = Nd4j.rand(10, 10);

        net.fit(in, l);
        net2.fit(in, l);
    }

    assertEquals(net.params(), net2.params());
    assertEquals(net.getUpdater().getStateViewArray(), net2.getUpdater().getStateViewArray());
}
 
Example #20
Source File: AlexNetTrain.java    From dl4j-tutorials with MIT License 4 votes vote down vote up
public static  MultiLayerNetwork alexnetModel() {
    /**
     * AlexNet model interpretation based on the original paper ImageNet Classification with Deep Convolutional Neural Networks
     * and the imagenetExample code referenced.
     * http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf
     **/

    double nonZeroBias = 1;
    double dropOut = 0.8;

    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .seed(seed)
            .weightInit(WeightInit.DISTRIBUTION)
            .dist(new NormalDistribution(0.0, 0.01))
            .activation(Activation.RELU)
            .updater(new Nesterovs(new StepSchedule(ScheduleType.ITERATION, 0.1, 0.1, 100000), 0.9))
            .biasUpdater(new Nesterovs(new StepSchedule(ScheduleType.ITERATION, 0.2, 0.1, 100000), 0.9))
            .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) // normalize to prevent vanishing or exploding gradients
            //.l2(5 * 1e-4)
            .list()
            .layer(0, convInit("cnn1", channels, 96, new int[]{11, 11}, new int[]{4, 4}, new int[]{3, 3}, 0))
            .layer(1, new LocalResponseNormalization.Builder().name("lrn1").build())
            .layer(2, maxPool("maxpool1", new int[]{3,3}))
            .layer(3, conv5x5("cnn2", 256, new int[] {1,1}, new int[] {2,2}, nonZeroBias))
            .layer(4, new LocalResponseNormalization.Builder().name("lrn2").build())
            .layer(5, maxPool("maxpool2", new int[]{3,3}))
            .layer(6,conv3x3("cnn3", 384, 0))
            .layer(7,conv3x3("cnn4", 384, nonZeroBias))
            .layer(8,conv3x3("cnn5", 256, nonZeroBias))
            .layer(9, maxPool("maxpool3", new int[]{3,3}))
            .layer(10, fullyConnected("ffn1", 4096, nonZeroBias, dropOut, new GaussianDistribution(0, 0.005)))
            .layer(11, fullyConnected("ffn2", 4096, nonZeroBias, dropOut, new GaussianDistribution(0, 0.005)))
            .layer(12, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
                    .name("output")
                    .nOut(numLabels)
                    .activation(Activation.SOFTMAX)
                    .build())
            .backprop(true)
            .pretrain(false)
            .setInputType(InputType.convolutional(height, width, channels))
            .build();

    return new MultiLayerNetwork(conf);

}