org.nd4j.parameterserver.distributed.conf.VoidConfiguration Java Examples

The following examples show how to use org.nd4j.parameterserver.distributed.conf.VoidConfiguration. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AeronUdpTransportTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
//@Ignore
public void testBasic_Connection_1() throws Exception {
    // we definitely want to shutdown all transports after test, to avoid issues with shmem
    try(val transportA = new AeronUdpTransport(IP, ROOT_PORT, IP, ROOT_PORT, VoidConfiguration.builder().build());  val transportB = new AeronUdpTransport(IP, 40782, IP, ROOT_PORT, VoidConfiguration.builder().build())) {
        transportA.launchAsMaster();

        Thread.sleep(50);

        transportB.launch();

        Thread.sleep(50);

        assertEquals(2, transportA.getMesh().totalNodes());
        assertEquals(transportA.getMesh(), transportB.getMesh());
    }
}
 
Example #2
Source File: InterleavedRouterTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
    configuration = VoidConfiguration.builder()
                    .shardAddresses(Arrays.asList("1.2.3.4", "2.3.4.5", "3.4.5.6", "4.5.6.7")).numberOfShards(4) // we set it manually here
                    .build();

    transport = new RoutedTransport();
    transport.setIpAndPort("8.9.10.11", 87312);
    originator = HashUtil.getLongHash(transport.getIp() + ":" + transport.getPort());
}
 
Example #3
Source File: VoidParameterServerTest.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testNodeRole3() throws Exception {
    final VoidConfiguration conf = VoidConfiguration.builder().unicastPort(34567).multicastPort(45678)
                    .numberOfShards(10).shardAddresses(badIPs).backupAddresses(badIPs).multicastNetwork("224.0.1.1")
                    .ttl(4).build();

    VoidParameterServer node = new VoidParameterServer();
    node.init(conf, transport, new SkipGramTrainer());

    assertEquals(NodeRole.CLIENT, node.getNodeRole());
    node.shutdown();
}
 
Example #4
Source File: VoidParameterServerStressTest.java    From nd4j with Apache License 2.0 5 votes vote down vote up
/**
 * This test checks for single Shard scenario, when Shard is also a Client
 *
 * @throws Exception
 */
@Test
public void testPerformanceUnicast3() throws Exception {
    VoidConfiguration voidConfiguration = VoidConfiguration.builder().unicastPort(49823).numberOfShards(1)
                    .shardAddresses(Arrays.asList("127.0.0.1:49823")).build();

    Transport transport = new RoutedTransport();
    transport.setIpAndPort("127.0.0.1", Integer.valueOf("49823"));

    VoidParameterServer parameterServer = new VoidParameterServer(NodeRole.SHARD);
    parameterServer.setShardIndex((short) 0);
    parameterServer.init(voidConfiguration, transport, new CbowTrainer());

    parameterServer.initializeSeqVec(100, NUM_WORDS, 123L, 100, true, false);

    final List<Long> times = new ArrayList<>();

    log.info("Starting loop...");
    for (int i = 0; i < 200; i++) {
        Frame<CbowRequestMessage> frame = new Frame<>(BasicSequenceProvider.getInstance().getNextValue());
        for (int f = 0; f < 128; f++) {
            frame.stackMessage(getCRM());
        }
        long time1 = System.nanoTime();
        parameterServer.execDistributed(frame);
        long time2 = System.nanoTime();

        times.add(time2 - time1);

        if (i % 50 == 0)
            log.info("{} frames passed...", i);
    }


    Collections.sort(times);

    log.info("p50: {} us", times.get(times.size() / 2) / 1000);

    parameterServer.shutdown();
}
 
Example #5
Source File: VoidParameterServerTest.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testNodeRole1() throws Exception {
    final VoidConfiguration conf = VoidConfiguration.builder().unicastPort(34567).multicastPort(45678)
                    .numberOfShards(10).multicastNetwork("224.0.1.1").shardAddresses(localIPs).ttl(4).build();

    VoidParameterServer node = new VoidParameterServer();
    node.init(conf, transport, new SkipGramTrainer());

    assertEquals(NodeRole.SHARD, node.getNodeRole());
    node.shutdown();
}
 
Example #6
Source File: VoidParameterServerTest.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testNodeRole2() throws Exception {
    final VoidConfiguration conf = VoidConfiguration.builder().unicastPort(34567).multicastPort(45678)
                    .numberOfShards(10).shardAddresses(badIPs).backupAddresses(localIPs)
                    .multicastNetwork("224.0.1.1").ttl(4).build();

    VoidParameterServer node = new VoidParameterServer();
    node.init(conf, transport, new SkipGramTrainer());

    assertEquals(NodeRole.BACKUP, node.getNodeRole());
    node.shutdown();
}
 
Example #7
Source File: SharedTrainingMaster.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public Builder(@NonNull VoidConfiguration voidConfiguration, Integer numWorkers, ThresholdAlgorithm thresholdAlgorithm, int rddDataSetNumExamples) {
    this.thresholdAlgorithm = thresholdAlgorithm;
    this.voidConfiguration = voidConfiguration;
    this.rddDataSetNumExamples = rddDataSetNumExamples;
    this.numWorkers = numWorkers;

    // we're enforcing managed mode in all cases here
    this.voidConfiguration.setExecutionMode(ExecutionMode.MANAGED);
}
 
Example #8
Source File: InterleavedRouter.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Override
public void init(@NonNull VoidConfiguration voidConfiguration, @NonNull Transport transport) {
    super.init(voidConfiguration, transport);

    // by default messages are being routed to any random shard
    if (targetIndex < 0)
        targetIndex = (short) RandomUtils.nextInt(0, voidConfiguration.getNumberOfShards());
}
 
Example #9
Source File: TrainerProvider.java    From nd4j with Apache License 2.0 5 votes vote down vote up
public void init(@NonNull VoidConfiguration voidConfiguration, @NonNull Transport transport,
                @NonNull Storage storage, @NonNull Clipboard clipboard) {
    this.voidConfiguration = voidConfiguration;
    this.transport = transport;
    this.clipboard = clipboard;
    this.storage = storage;

    for (TrainingDriver<?> trainer : trainers.values()) {
        trainer.init(voidConfiguration, transport, storage, clipboard);
    }
}
 
Example #10
Source File: BaseTrainer.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Override
public void init(@NonNull VoidConfiguration voidConfiguration, @NonNull Transport transport,
                @NonNull Storage storage, @NonNull Clipboard clipboard) {
    this.clipboard = clipboard;
    this.transport = transport;
    this.voidConfiguration = voidConfiguration;
    this.storage = storage;
}
 
Example #11
Source File: BaseVoidMessage.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Override
public void attachContext(@NonNull VoidConfiguration voidConfiguration,
                @NonNull TrainingDriver<? extends TrainingMessage> trainer, @NonNull Clipboard clipboard,
                @NonNull Transport transport, @NonNull Storage storage, @NonNull NodeRole role, short shardIndex) {
    this.voidConfiguration = voidConfiguration;
    this.clipboard = clipboard;
    this.transport = transport;
    this.storage = storage;
    this.role = role;
    this.shardIndex = shardIndex;
    this.trainer = trainer;
}
 
Example #12
Source File: InterleavedRouterTest.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
    configuration = VoidConfiguration.builder()
                    .shardAddresses(Arrays.asList("1.2.3.4", "2.3.4.5", "3.4.5.6", "4.5.6.7")).numberOfShards(4) // we set it manually here
                    .build();

    transport = new RoutedTransport();
    transport.setIpAndPort("8.9.10.11", 87312);
    originator = HashUtil.getLongHash(transport.getIp() + ":" + transport.getPort());
}
 
Example #13
Source File: SharedTrainingMaster.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * @param voidConfiguration     Configuration bean for the SharedTrainingMaster parameter server
 * @param thresholdAlgorithm    Update sharing threshold algorithm
 * @param rddDataSetNumExamples
 */
public Builder(@NonNull VoidConfiguration voidConfiguration, ThresholdAlgorithm thresholdAlgorithm, int rddDataSetNumExamples) {
    this.thresholdAlgorithm = thresholdAlgorithm;
    this.voidConfiguration = voidConfiguration;
    this.rddDataSetNumExamples = rddDataSetNumExamples;

    // we're enforcing managed mode in all cases here
    this.voidConfiguration.setExecutionMode(ExecutionMode.MANAGED);
}
 
Example #14
Source File: SharedTrainingMaster.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Create a SharedTrainingMaster with defaults other than the RDD number of examples
 * @param thresholdAlgorithm    Threshold algorithm for the sparse update encoding
 * @param rddDataSetNumExamples When fitting from an {@code RDD<DataSet>} how many examples are in each dataset?
 */
public Builder(ThresholdAlgorithm thresholdAlgorithm, int rddDataSetNumExamples) {
    this(VoidConfiguration.builder().executionMode(ExecutionMode.MANAGED).forcedRole(NodeRole.SHARD)
                    // we're setting controller to Spark Master, if it's null - that's ok for now.
                    .controllerAddress(System.getenv("SPARK_PUBLIC_DNS")).build(), thresholdAlgorithm,
                    rddDataSetNumExamples);
}
 
Example #15
Source File: SharedTrainingMaster.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public SharedTrainingMaster(@NonNull VoidConfiguration voidConfiguration, Integer numWorkers,
                RDDTrainingApproach rddTrainingApproach, StorageLevel storageLevel, boolean collectTrainingStats,
                RepartitionStrategy repartitionStrategy, Repartition repartition,
                ThresholdAlgorithm thresholdAlgorithm, ResidualPostProcessor residualPostProcessor,
                int rddDataSetNumExamples,
                int batchSizePerWorker, long debugLongerIterations, int numWorkersPerNode, int workerPrefetchBatches,
                Repartitioner repartitioner, Boolean workerTogglePeriodicGC, Integer workerPeriodicGCFrequency,
                boolean encodingDebugMode) {
    this.voidConfiguration = voidConfiguration;
    this.numWorkers = numWorkers;
    this.thresholdAlgorithm = thresholdAlgorithm;
    this.residualPostProcessor = residualPostProcessor;
    this.rddTrainingApproach = rddTrainingApproach;
    this.repartitionStrategy = repartitionStrategy;
    this.repartition = repartition;
    this.storageLevel = storageLevel;
    this.collectTrainingStats = collectTrainingStats;
    this.isFirstRun = new AtomicBoolean(false);
    this.batchSizePerWorker = batchSizePerWorker;
    this.rddDataSetNumExamples = rddDataSetNumExamples;
    this.debugLongerIterations = debugLongerIterations;
    this.numWorkersPerNode = numWorkersPerNode;
    this.workerPrefetchBatches = workerPrefetchBatches;
    this.repartitioner = repartitioner;
    this.workerTogglePeriodicGC = workerTogglePeriodicGC;
    this.workerPeriodicGCFrequency = workerPeriodicGCFrequency;
    this.encodingDebugMode = encodingDebugMode;


    if (collectTrainingStats)
        stats = new ParameterAveragingTrainingMasterStats.ParameterAveragingTrainingMasterStatsHelper();


    String jvmuid = UIDProvider.getJVMUID();
    this.trainingMasterUID =
                    System.currentTimeMillis() + "_" + (jvmuid.length() <= 8 ? jvmuid : jvmuid.substring(0, 8));
    instanceId = INSTANCE_COUNTER.getAndIncrement();
}
 
Example #16
Source File: SilentUpdatesMessage.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void attachContext(VoidConfiguration voidConfiguration, TrainingDriver<? extends TrainingMessage> trainer,
                Clipboard clipboard, Transport transport, Storage storage, NodeRole role, short shardIndex) {
    this.voidConfiguration = voidConfiguration;
    this.trainer = trainer;
    this.transport = transport;
}
 
Example #17
Source File: Frame.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Override
public void attachContext(@NonNull VoidConfiguration voidConfiguration,
                @NonNull TrainingDriver<? extends TrainingMessage> trainer, @NonNull Clipboard clipboard,
                @NonNull Transport transport, @NonNull Storage storage, @NonNull NodeRole role, short shardIndex) {
    this.voidConfiguration = voidConfiguration;
    this.clipboard = clipboard;
    this.transport = transport;
    this.storage = storage;
    this.role = role;
    this.shardIndex = shardIndex;
    this.trainer = trainer;
}
 
Example #18
Source File: AeronUdpTransport.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public AeronUdpTransport(@NonNull String ownIp, int ownPort, @NonNull String rootIp, int rootPort, @NonNull VoidConfiguration configuration) {
    super("aeron:udp?endpoint=" + ownIp + ":" + ownPort, "aeron:udp?endpoint=" + rootIp + ":" + rootPort, configuration);

    Preconditions.checkArgument(ownPort > 0 && ownPort < 65536, "Own UDP port should be positive value in range of 1 and 65536");
    Preconditions.checkArgument(rootPort > 0 && rootPort < 65536, "Master node UDP port should be positive value in range of 1 and 65536");

    setProperty("aeron.client.liveness.timeout", "30000000000");

    // setting this property to try to increase maxmessage length, not sure if it still works though
    //Term buffer length: must be power of 2 and in range 64kB to 1GB: https://github.com/real-logic/aeron/wiki/Configuration-Options
    String p = System.getProperty(ND4JSystemProperties.AERON_TERM_BUFFER_PROP);
    if(p == null){
        System.setProperty(ND4JSystemProperties.AERON_TERM_BUFFER_PROP, String.valueOf(DEFAULT_TERM_BUFFER_PROP));
    }

    splitter = MessageSplitter.getInstance();

    context = new Aeron.Context().driverTimeoutMs(30000)
            .keepAliveInterval(100000000);
    AeronUtil.setDaemonizedThreadFactories(context);

    final MediaDriver.Context mediaDriverCtx = new MediaDriver.Context();
    AeronUtil.setDaemonizedThreadFactories(mediaDriverCtx);

    driver = MediaDriver.launchEmbedded(mediaDriverCtx);
    context.aeronDirectoryName(driver.aeronDirectoryName());
    aeron = Aeron.connect(context);

    Runtime.getRuntime().addShutdownHook(new Thread(() -> {
        this.shutdown();
    }));
}
 
Example #19
Source File: BaseTransport.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
protected BaseTransport(@NonNull String ownId, @NonNull String rootId, @NonNull VoidConfiguration voidConfiguration) {
    this.mesh.set(new MeshOrganizer(voidConfiguration.getMeshBuildMode()));
    this.id = ownId;
    this.rootId = rootId;
    this.voidConfiguration = voidConfiguration;

    masterMode = ownId.equalsIgnoreCase(rootId);
    if (masterMode) {
        this.mesh.get().getRootNode().setId(rootId);
    }
}
 
Example #20
Source File: PartitionTrainingFunction.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public PartitionTrainingFunction(@NonNull Broadcast<VocabCache<ShallowSequenceElement>> vocabCacheBroadcast,
                @NonNull Broadcast<VectorsConfiguration> vectorsConfigurationBroadcast,
                @NonNull Broadcast<VoidConfiguration> paramServerConfigurationBroadcast) {
    this.vocabCacheBroadcast = vocabCacheBroadcast;
    this.configurationBroadcast = vectorsConfigurationBroadcast;
    this.paramServerConfigurationBroadcast = paramServerConfigurationBroadcast;
}
 
Example #21
Source File: DummyTransport.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public DummyTransport(String id, Connector connector, @NonNull String rootId, @NonNull VoidConfiguration configuration) {
    super(rootId, configuration);
    this.id = id;
    this.connector = connector;

    this.splitter = new MessageSplitter();
}
 
Example #22
Source File: BaseVoidMessage.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void attachContext(@NonNull VoidConfiguration voidConfiguration,
                @NonNull TrainingDriver<? extends TrainingMessage> trainer, @NonNull Clipboard clipboard,
                @NonNull Transport transport, @NonNull Storage storage, @NonNull NodeRole role, short shardIndex) {
    this.voidConfiguration = voidConfiguration;
    this.clipboard = clipboard;
    this.transport = transport;
    this.storage = storage;
    this.role = role;
    this.shardIndex = shardIndex;
    this.trainer = trainer;
}
 
Example #23
Source File: Frame.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void attachContext(@NonNull VoidConfiguration voidConfiguration,
                @NonNull TrainingDriver<? extends TrainingMessage> trainer, @NonNull Clipboard clipboard,
                @NonNull Transport transport, @NonNull Storage storage, @NonNull NodeRole role, short shardIndex) {
    this.voidConfiguration = voidConfiguration;
    this.clipboard = clipboard;
    this.transport = transport;
    this.storage = storage;
    this.role = role;
    this.shardIndex = shardIndex;
    this.trainer = trainer;
}
 
Example #24
Source File: VoidParameterServerTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 60000L)
public void testNodeInitialization1() throws Exception {
    final AtomicInteger failCnt = new AtomicInteger(0);
    final AtomicInteger passCnt = new AtomicInteger(0);

    final VoidConfiguration conf = VoidConfiguration.builder().multicastPort(45678)
                    .numberOfShards(10).shardAddresses(localIPs).multicastNetwork("224.0.1.1").ttl(4).build();
    conf.setUnicastControllerPort(34567);

    Thread[] threads = new Thread[10];
    for (int t = 0; t < threads.length; t++) {
        threads[t] = new Thread(new Runnable() {
            @Override
            public void run() {
                VoidParameterServer node = new VoidParameterServer();
                node.init(conf, transport, new SkipGramTrainer());

                if (node.getNodeRole() != NodeRole.SHARD)
                    failCnt.incrementAndGet();

                passCnt.incrementAndGet();

                node.shutdown();
            }
        });

        threads[t].start();
    }


    for (int t = 0; t < threads.length; t++) {
        threads[t].join();
    }

    assertEquals(0, failCnt.get());
    assertEquals(threads.length, passCnt.get());
}
 
Example #25
Source File: VoidParameterServerTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 30000L)
public void testNodeRole3() throws Exception {
    final VoidConfiguration conf = VoidConfiguration.builder().multicastPort(45678)
                    .numberOfShards(10).shardAddresses(badIPs).backupAddresses(badIPs).multicastNetwork("224.0.1.1")
                    .ttl(4).build();
    conf.setUnicastControllerPort(34567);

    VoidParameterServer node = new VoidParameterServer();
    node.init(conf, transport, new SkipGramTrainer());

    assertEquals(NodeRole.CLIENT, node.getNodeRole());
    node.shutdown();
}
 
Example #26
Source File: VoidParameterServerTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 30000L)
public void testNodeRole2() throws Exception {
    final VoidConfiguration conf = VoidConfiguration.builder().multicastPort(45678)
                    .numberOfShards(10).shardAddresses(badIPs).backupAddresses(localIPs)
                    .multicastNetwork("224.0.1.1").ttl(4).build();
    conf.setUnicastControllerPort(34567);

    VoidParameterServer node = new VoidParameterServer();
    node.init(conf, transport, new SkipGramTrainer());

    assertEquals(NodeRole.BACKUP, node.getNodeRole());
    node.shutdown();
}
 
Example #27
Source File: TrainingFunction.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public TrainingFunction(@NonNull Broadcast<VocabCache<ShallowSequenceElement>> vocabCacheBroadcast,
                @NonNull Broadcast<VectorsConfiguration> vectorsConfigurationBroadcast,
                @NonNull Broadcast<VoidConfiguration> paramServerConfigurationBroadcast) {
    this.vocabCacheBroadcast = vocabCacheBroadcast;
    this.configurationBroadcast = vectorsConfigurationBroadcast;
    this.paramServerConfigurationBroadcast = paramServerConfigurationBroadcast;
}
 
Example #28
Source File: BaseTrainer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void init(@NonNull VoidConfiguration voidConfiguration, @NonNull Transport transport,
                @NonNull Storage storage, @NonNull Clipboard clipboard) {
    this.clipboard = clipboard;
    this.transport = transport;
    this.voidConfiguration = voidConfiguration;
    this.storage = storage;
}
 
Example #29
Source File: TrainerProvider.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public void init(@NonNull VoidConfiguration voidConfiguration, @NonNull Transport transport,
                @NonNull Storage storage, @NonNull Clipboard clipboard) {
    this.voidConfiguration = voidConfiguration;
    this.transport = transport;
    this.clipboard = clipboard;
    this.storage = storage;

    for (TrainingDriver<?> trainer : trainers.values()) {
        trainer.init(voidConfiguration, transport, storage, clipboard);
    }
}
 
Example #30
Source File: InterleavedRouter.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void init(@NonNull VoidConfiguration voidConfiguration, @NonNull Transport transport) {
    super.init(voidConfiguration, transport);

    // by default messages are being routed to any random shard
    if (targetIndex < 0)
        targetIndex = (short) RandomUtils.nextInt(0, voidConfiguration.getNumberOfShards());
}