Python provider.shuffle_data() Examples

The following are 9 code examples of provider.shuffle_data(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module provider , or try the search function

Example #1

Source File: train.py From ASIS with MIT License

5 votes

def train_one_epoch(sess, ops, train_writer):
    """ ops: dict mapping from string to tf ops """
    is_training = True
    
    log_string('----')
    current_data, current_label, shuffled_idx = provider.shuffle_data(train_data[:, 0:NUM_POINT, :], train_group)
    current_sem = train_sem[shuffled_idx]
    
    file_size = current_data.shape[0]
    num_batches = file_size // BATCH_SIZE

    loss_sum = 0
    
    for batch_idx in range(num_batches):
        if batch_idx % 100 == 0:
            print('Current batch/total batch num: %d/%d'%(batch_idx,num_batches))
        start_idx = batch_idx * BATCH_SIZE
        end_idx = (batch_idx+1) * BATCH_SIZE
        
        feed_dict = {ops['pointclouds_pl']: current_data[start_idx:end_idx, :, :],
                     ops['labels_pl']: current_label[start_idx:end_idx],
                     ops['sem_labels_pl']: current_sem[start_idx:end_idx],
                     ops['is_training_pl']: is_training,}
        summary, step, _, loss_val, sem_loss_val, disc_loss_val, l_var_val, l_dist_val, l_reg_val = sess.run([ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['sem_loss'], ops['disc_loss'], ops['l_var'], ops['l_dist'], ops['l_reg']],
                                         feed_dict=feed_dict)
        train_writer.add_summary(summary, step)
        loss_sum += loss_val
        
        if batch_idx % 50 == 0:
            log_string("loss: {:.2f}; sem_loss: {:.2f}; disc_loss: {:.2f}; l_var: {:.2f}; l_dist: {:.2f}; l_reg: {:.3f}.".format(loss_val, sem_loss_val, disc_loss_val, l_var_val, l_dist_val, l_reg_val))
    
    log_string('mean loss: %f' % (loss_sum / float(num_batches)))

Example #2

Source File: train.py From PointCNN.Pytorch with MIT License

5 votes

def train_one_epoch(sess, ops, train_writer):
    """ ops: dict mapping from string to tf ops """
    is_training = True
    
    log_string('----')
    current_data, current_label, _ = provider.shuffle_data(train_data[:,0:NUM_POINT,:], train_label) 
    
    file_size = current_data.shape[0]
    num_batches = file_size // BATCH_SIZE
    
    total_correct = 0
    total_seen = 0
    loss_sum = 0
    
    for batch_idx in range(num_batches):
        if batch_idx % 100 == 0:
            print('Current batch/total batch num: %d/%d'%(batch_idx,num_batches))
        start_idx = batch_idx * BATCH_SIZE
        end_idx = (batch_idx+1) * BATCH_SIZE
        
        feed_dict = {ops['pointclouds_pl']: current_data[start_idx:end_idx, :, :],
                     ops['labels_pl']: current_label[start_idx:end_idx],
                     ops['is_training_pl']: is_training,}
        summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['pred']],
                                         feed_dict=feed_dict)
        train_writer.add_summary(summary, step)
        pred_val = np.argmax(pred_val, 2)
        correct = np.sum(pred_val == current_label[start_idx:end_idx])
        total_correct += correct
        total_seen += (BATCH_SIZE*NUM_POINT)
        loss_sum += loss_val
    
    log_string('mean loss: %f' % (loss_sum / float(num_batches)))
    log_string('accuracy: %f' % (total_correct / float(total_seen)))

Example #3

Source File: train.py From ldgcnn with MIT License

5 votes

def train_classifier_one_epoch(sess, ops, train_writer):
    """ ops: dict mapping from string to tf ops """
    is_training = True
    
    for fn in range(len(TRAIN_FILES_CLS)):    
        # Shuffle train files
        current_data, current_label = provider.loadDataFile(TRAIN_FILES_CLS[fn]) 
        current_data, current_label, _ = provider.shuffle_data(current_data, np.squeeze(current_label))
        current_label = np.squeeze(current_label)
        # I find that we can increase the accuracy by about 0.2% after 
        # padding zero vectors, but I do not know the reason.
        current_data = np.concatenate([current_data, np.zeros((
                current_data.shape[0], NUM_FEATURE_CLS - current_data.shape[1]))], axis  = -1)
        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE
        
        total_correct = 0
        total_seen = 0
        loss_sum = 0
        
        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx+1) * BATCH_SIZE
            
            # Input the features and labels to the graph.
            feed_dict = {ops['pointclouds_pl']: current_data[start_idx:end_idx,...],
                         ops['labels_pl']: current_label[start_idx:end_idx],
                         ops['is_training_pl']: is_training,}
            # Calculate the loss and classification scores.
            summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'],
                ops['train_op'], ops['loss'], ops['pred']], feed_dict=feed_dict)
                    
            train_writer.add_summary(summary, step)
            pred_val = np.argmax(pred_val, 1)
            correct = np.sum(pred_val == current_label[start_idx:end_idx])
            total_correct += correct
            total_seen += BATCH_SIZE
            loss_sum += loss_val

Example #4

Source File: train_xyz.py From SpiderCNN with MIT License

4 votes

def train_one_epoch(sess, ops, train_writer):
    """ ops: dict mapping from string to tf ops """
    is_training = True
    
    # Shuffle train files
    train_file_idxs = np.arange(0, len(TRAIN_FILES))
    np.random.shuffle(train_file_idxs)
    
    for fn in range(len(TRAIN_FILES)):
        log_string('----' + str(fn) + '-----')
        current_data, current_label, _ = provider.loadDataFile_with_normal(TRAIN_FILES[train_file_idxs[fn]])
        current_data = current_data[:,0:NUM_POINT,:]
        current_data, current_label, _ = provider.shuffle_data(current_data, np.squeeze(current_label))           
        current_label = np.squeeze(current_label)

        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE
        
        total_correct = 0
        total_seen = 0
        loss_sum = 0
       
        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx+1) * BATCH_SIZE
            
            # Augment batched point clouds by rotation and jittering
            rotated_data = provider.rotate_point_cloud(current_data[start_idx:end_idx, :, :])
            jittered_data = provider.jitter_point_cloud(rotated_data)

            feed_dict = {ops['pointclouds_pl']: jittered_data,
                         ops['labels_pl']: current_label[start_idx:end_idx],
                         ops['is_training_pl']: is_training,}
            summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'],
                ops['train_op'], ops['loss'], ops['pred']], feed_dict=feed_dict)
            train_writer.add_summary(summary, step)
            pred_val = np.argmax(pred_val, 1)
            correct = np.sum(pred_val == current_label[start_idx:end_idx])
            total_correct += correct
            total_seen += BATCH_SIZE
            loss_sum += loss_val
        
        log_string('mean loss: %f' % (loss_sum / float(num_batches)))
        log_string('accuracy: %f' % (total_correct / float(total_seen)))

Example #5

Source File: train.py From SpiderCNN with MIT License

4 votes

def train_one_epoch(sess, ops, train_writer):
    """ ops: dict mapping from string to tf ops """
    is_training = True
    
    # Shuffle train files
    train_file_idxs = np.arange(0, len(TRAIN_FILES))
    np.random.shuffle(train_file_idxs)
    
    for fn in range(len(TRAIN_FILES)):
        log_string('----' + str(fn) + '-----')
        current_data, current_label, normal_data = provider.loadDataFile_with_normal(TRAIN_FILES[train_file_idxs[fn]])
        normal_data = normal_data[:,0:NUM_POINT,:]
        current_data = current_data[:,0:NUM_POINT,:]
        current_data, current_label, shuffle_idx = provider.shuffle_data(current_data, np.squeeze(current_label))           
        current_label = np.squeeze(current_label)
        normal_data = normal_data[shuffle_idx, ...]

        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE
        
        total_correct = 0
        total_seen = 0
        loss_sum = 0
       
        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx+1) * BATCH_SIZE
            
            # Augment batched point clouds by rotation and jittering
            rotated_data = provider.rotate_point_cloud(current_data[start_idx:end_idx, :, :])
            jittered_data = provider.jitter_point_cloud(rotated_data)
            input_data = np.concatenate((jittered_data, normal_data[start_idx:end_idx, :, :]), 2)
            #random point dropout
            input_data = provider.random_point_dropout(input_data)


            feed_dict = {ops['pointclouds_pl']: input_data,
                         ops['labels_pl']: current_label[start_idx:end_idx],
                         ops['is_training_pl']: is_training,}
            summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'],
                ops['train_op'], ops['loss'], ops['pred']], feed_dict=feed_dict)
            train_writer.add_summary(summary, step)
            pred_val = np.argmax(pred_val, 1)
            correct = np.sum(pred_val == current_label[start_idx:end_idx])
            total_correct += correct
            total_seen += BATCH_SIZE
            loss_sum += loss_val
        
        log_string('mean loss: %f' % (loss_sum / float(num_batches)))
        log_string('accuracy: %f' % (total_correct / float(total_seen)))

Example #6

Source File: train.py From PointCNN.Pytorch with MIT License

4 votes

def train_one_epoch(sess, ops, train_writer):
    """ ops: dict mapping from string to tf ops """
    is_training = True
    
    # Shuffle train files
    train_file_idxs = np.arange(0, len(TRAIN_FILES))
    np.random.shuffle(train_file_idxs)
    
    for fn in range(len(TRAIN_FILES)):
        log_string('----' + str(fn) + '-----')
        current_data, current_label = provider.loadDataFile(TRAIN_FILES[train_file_idxs[fn]])
        current_data = current_data[:,0:NUM_POINT,:]
        current_data, current_label, _ = provider.shuffle_data(current_data, np.squeeze(current_label))            
        current_label = np.squeeze(current_label)
        
        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE
        
        total_correct = 0
        total_seen = 0
        loss_sum = 0
       
        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx+1) * BATCH_SIZE
            
            # Augment batched point clouds by rotation and jittering
            rotated_data = provider.rotate_point_cloud(current_data[start_idx:end_idx, :, :])
            jittered_data = provider.jitter_point_cloud(rotated_data)
            feed_dict = {ops['pointclouds_pl']: jittered_data,
                         ops['labels_pl']: current_label[start_idx:end_idx],
                         ops['is_training_pl']: is_training,}
            summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'],
                ops['train_op'], ops['loss'], ops['pred']], feed_dict=feed_dict)
            train_writer.add_summary(summary, step)
            pred_val = np.argmax(pred_val, 1)
            correct = np.sum(pred_val == current_label[start_idx:end_idx])
            total_correct += correct
            total_seen += BATCH_SIZE
            loss_sum += loss_val
        
        log_string('mean loss: %f' % (loss_sum / float(num_batches)))
        log_string('accuracy: %f' % (total_correct / float(total_seen)))

Example #7

Source File: train.py From deep_gcns with MIT License

4 votes

def train_one_epoch(sess, ops, train_writer):
  """ ops: dict mapping from string to tf ops """
  is_training = True

  sem_seg_util.log_string(LOG_FOUT, '----')
  current_data, current_label, _ = provider.shuffle_data(train_data[:,0:NUM_POINTS,:], train_label)

  file_size = current_data.shape[0]
  num_batches = file_size // (NUM_GPU * BATCH_SIZE)

  total_correct = 0
  total_seen = 0
  loss_sum = 0

  for batch_idx in range(num_batches):

    if batch_idx % 100 == 0:
      print('Current batch/total batch num: %d/%d'%(batch_idx,num_batches))

    start_idx = []
    end_idx = []

    for gpu_idx in range(NUM_GPU):
      start_idx.append((batch_idx + gpu_idx) * BATCH_SIZE)
      end_idx.append((batch_idx + gpu_idx + 1) * BATCH_SIZE)

    feed_dict = dict()
    for gpu_idx in range(NUM_GPU):
      feed_dict[ops['inputs_phs'][gpu_idx]] = current_data[start_idx[gpu_idx]:end_idx[gpu_idx], :, :]
      feed_dict[ops['labels_phs'][gpu_idx]] = current_label[start_idx[gpu_idx]:end_idx[gpu_idx]]
      feed_dict[ops['is_training_phs'][gpu_idx]] = is_training

    summary, step, _, loss_val, pred_val = sess.run([ops['merged'],
                                                     ops['step'],
                                                     ops['train_op'],
                                                     ops['loss'],
                                                     ops['pred']],
                                                    feed_dict=feed_dict)

    train_writer.add_summary(summary, step)
    pred_val = np.argmax(pred_val, 2)
    correct = np.sum(pred_val == current_label[start_idx[-1]:end_idx[-1]])
    total_correct += correct
    total_seen += (BATCH_SIZE*NUM_POINTS)
    loss_sum += loss_val

  sem_seg_util.log_string(LOG_FOUT, 'mean loss: %f' % (loss_sum / float(num_batches)))
  sem_seg_util.log_string(LOG_FOUT, 'accuracy: %f' % (total_correct / float(total_seen)))

Example #8

Source File: train.py From ldgcnn with MIT License

4 votes

def train_one_epoch(sess, ops, train_writer):
    """ ops: dict mapping from string to tf ops """
    is_training = True
    
    # Shuffle train files
    train_file_idxs = np.arange(0, len(TRAIN_FILES))
    np.random.shuffle(train_file_idxs)
    
    for fn in range(len(TRAIN_FILES)):
        log_string('----' + str(fn) + '-----')
        # Load data and labels from the files.
        current_data, current_label = provider.loadDataFile(TRAIN_FILES[train_file_idxs[fn]])
        current_data = current_data[:,0:NUM_POINT,:]
        # Shuffle the data in the training set.
        current_data, current_label, _ = provider.shuffle_data(current_data, np.squeeze(current_label))            
        current_label = np.squeeze(current_label)
        
        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE
        
        total_correct = 0
        total_seen = 0
        loss_sum = 0
       
        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx+1) * BATCH_SIZE
            
            # Augment batched point clouds by rotating, jittering, shifting, 
            # and scaling.
            rotated_data = provider.rotate_point_cloud(current_data[start_idx:end_idx, :, :])
            jittered_data = provider.jitter_point_cloud(rotated_data)
            jittered_data = provider.random_scale_point_cloud(jittered_data)
            jittered_data = provider.rotate_perturbation_point_cloud(jittered_data)
            jittered_data = provider.shift_point_cloud(jittered_data)
            
            # Input the augmented point cloud and labels to the graph.
            feed_dict = {ops['pointclouds_pl']: jittered_data,
                         ops['labels_pl']: current_label[start_idx:end_idx],
                         ops['is_training_pl']: is_training,}
            
            # Calculate the loss and accuracy of the input batch data.            
            summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'],
                ops['train_op'], ops['loss'], ops['pred']], feed_dict=feed_dict)
            
            train_writer.add_summary(summary, step)
            pred_val = np.argmax(pred_val, 1)
            correct = np.sum(pred_val == current_label[start_idx:end_idx])
            total_correct += correct
            total_seen += BATCH_SIZE
            loss_sum += loss_val
        
        log_string('mean loss: %f' % (loss_sum / float(num_batches)))
        log_string('accuracy: %f' % (total_correct / float(total_seen)))

Example #9

Source File: dataset_s3dis.py From JSNet with MIT License

4 votes

def data_sample(data_sample_queue, input_list, split, epoch, num_works, block_points=4096,
                block_size=1.0, stride=0.5, random_sample=False, sample_num=None, sample_aug=1):
    assert (input_list[0].endswith('npy') or input_list[0].endswith('h5')), "data format must be .npy or .h5"

    input_list_length = len(input_list)
    num_work = min(min(num_works, multiprocessing.cpu_count()), input_list_length // 4)

    if input_list_length > 4:
        num_work = max(num_work, 4)

    chunksize = input_list_length // num_work
    print("num input_list: {}, num works: {}, chunksize: {}".format(input_list_length, num_work, chunksize))

    if input_list[0].endswith('npy'):
        data_sample_func = functools_partial(
            indoor3d_util.room2blocks_wrapper_normalized, num_point=block_points, block_size=block_size,
            stride=stride, random_sample=random_sample, sample_num=sample_num, sample_aug=sample_aug)
    elif input_list[0].endswith('h5'):
        def load_data_file(input_file):
            cur_data, cur_group, _, cur_sem = provider.loadDataFile_with_groupseglabel_stanfordindoor(input_file)
            return cur_data, cur_sem, cur_group
        data_sample_func = load_data_file

    def data_sample_single(input_file):
        datalabel = data_sample_func(input_file)
        if split == 'train':
            datalabel = provider.shuffle_data(*datalabel)
        return datalabel

    for _ in range(epoch):
        np.random.shuffle(input_list)
        for idx in range(chunksize + 1):
            start_idx = min(idx * num_work, input_list_length)
            end_idx = min((idx + 1) * num_work, input_list_length)
            if start_idx >= input_list_length or end_idx > input_list_length:
                continue

            with futures.ThreadPoolExecutor(num_work) as pool:
                data_sem_ins = list(pool.map(data_sample_single, input_list[start_idx:end_idx], chunksize=1))

                for dsi in data_sem_ins:
                    shuffle_dsi = provider.shuffle_data(*dsi)
                    data_sample_queue.put(shuffle_dsi)
                    del dsi
                    gc.collect()

                pool.shutdown()
                gc.collect()