Python random.sample() Examples

The following are 30 code examples of random.sample(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module random , or try the search function

Example #1

Source File: Generator.py From NTM-One-Shot-TF with MIT License

6 votes

def sample(self, nb_samples):
        sampled_character_folders = random.sample(self.character_folders, nb_samples)
        random.shuffle(sampled_character_folders)

        example_inputs = np.zeros((self.batch_size, nb_samples * self.nb_samples_per_class, np.prod(self.img_size)), dtype=np.float32)
        example_outputs = np.zeros((self.batch_size, nb_samples * self.nb_samples_per_class), dtype=np.float32)     #notice hardcoded np.float32 here and above, change it to something else in tf

        for i in range(self.batch_size):
            labels_and_images = get_shuffled_images(sampled_character_folders, range(nb_samples), nb_samples=self.nb_samples_per_class)
            sequence_length = len(labels_and_images)
            labels, image_files = zip(*labels_and_images)

            angles = np.random.uniform(-self.max_rotation, self.max_rotation, size=sequence_length)
            shifts = np.random.uniform(-self.max_shift, self.max_shift, size=sequence_length)

            example_inputs[i] = np.asarray([load_transform(filename, angle=angle, s=shift, size=self.img_size).flatten() \
                                            for (filename, angle, shift) in zip(image_files, angles, shifts)], dtype=np.float32)
            example_outputs[i] = np.asarray(labels, dtype=np.int32)

        return example_inputs, example_outputs

Example #2

Source File: test_random.py From chainerrl with MIT License

6 votes

def _test(self):
        t = self.get_timeit(
            "from chainerrl.misc.random import sample_n_k")

        # faster than random.sample
        t1 = self.get_timeit("""
import random
def sample_n_k(n, k):
    return random.sample(range(n), k)
""")
        self.assertLess(t, t1)

        # faster than np.random.choice(..., replace=False)
        t2 = self.get_timeit("""
import numpy as np
def sample_n_k(n, k):
    return np.random.choice(n, k, replace=False)
""")
        self.assertLess(t, t2)

Example #3

Source File: exp_replay.py From reinforcement_learning with MIT License

6 votes

def sample(self, num=None):
    """Randomly draw [num] samples"""
    if num == None:
      num = self.batch_size
    if len(self.mem) < self.start_mem:
      return []
    sampled_idx = random.sample(range(abs(self.kth),len(self.mem)), num)
    samples = []
    for idx in sampled_idx:
      steps = self.mem[idx-abs(self.kth):idx]
      cur_state = np.stack([s.cur_step for s in steps], axis=len(self.state_size))
      next_state = np.stack([s.next_step for s in steps], axis=len(self.state_size))
      # handle special cases
      if self.kth == -1:
        cur_state = steps[0].cur_step
        next_state = steps[0].next_step
      elif len(self.state_size) == 1:
        cur_state = [steps[0].cur_step]
        next_state = [steps[0].next_step]
      reward = steps[-1].reward
      action = steps[-1].action
      done = steps[-1].done
      samples.append(Step(cur_step=cur_state, action=action, next_step=next_state, reward=reward, done=done))
    return samples

Example #4

Source File: exp_replay.py From reinforcement_learning with MIT License

6 votes

def sample(self, num=None):
    """Randomly draw [num] samples"""
    if num == None:
      num = self.batch_size
    if len(self.mem) < self.start_mem:
      return []
    sampled_idx = random.sample(range(abs(self.kth),len(self.mem)), num)
    samples = []
    for idx in sampled_idx:
      steps = self.mem[idx-abs(self.kth):idx]
      cur_state = np.stack([s.cur_step for s in steps], axis=len(self.state_size))
      next_state = np.stack([s.next_step for s in steps], axis=len(self.state_size))
      # handle special cases
      if self.kth == -1:
        cur_state = steps[0].cur_step
        next_state = steps[0].next_step
      elif len(self.state_size) == 1:
        cur_state = [steps[0].cur_step]
        next_state = [steps[0].next_step]
      reward = steps[-1].reward
      action = steps[-1].action
      done = steps[-1].done
      samples.append(Step(cur_step=cur_state, action=action, next_step=next_state, reward=reward, done=done))
    return samples

Example #5

Source File: create_joint_gs.py From CAMISIM with Apache License 2.0

6 votes

def merge_bam_files(bams_per_genome, out, threads):
    """
    Merges (+sort +index)  all given bam files per genome (exact paths, single sample/multiple runs or multiple samples)
    """
    out_path = os.path.join(out,"bam")
    os.mkdir(out_path)
    for genome in bams_per_genome:
        list_of_bam = " ".join(bams_per_genome[genome]) # can be used as input to samtools immediately
        header = fix_headers(genome, bams_per_genome[genome], out_path)
        if header is not None:
            for bam in bams_per_genome[genome]: # add new header to all bam files
                cmd = "samtools reheader {header} {bam} >> {out}/out.bam; mv {out}/out.bam {bam}".format(
                    header = header,
                    out = out_path,
                    bam = bam
                )
                subprocess.call([cmd],shell=True)
        cmd = "samtools merge -@ {threads} - {bam_files} | samtools sort -@ {threads} - {path}/{genome}; samtools index {path}/{genome}.bam".format(
            threads = threads,
            bam_files = list_of_bam,
            path = out_path,
            genome = genome
        )
        subprocess.call([cmd],shell=True) # this runs a single command at a time (but that one multi threaded)
    return out_path

Example #6

Source File: create_joint_gs.py From CAMISIM with Apache License 2.0

6 votes

def get_samples(root_paths, samples):
    """
    Given the root paths  of the CAMISIM runs and the subset of samples, returns a dict from sample number to folders
    Assumes the sample folders to be in the format YYYY.MM.DD_HH.MM.SS_sample_#
    """
    used_samples = {}
    for path in root_paths:
        if not os.path.exists(path):
            raise IOError("No such file or directory: %s" % path)
        files = os.listdir(path)
        for f in files:
            try:
                date, time, sample, nr = f.split("_")
            except ValueError:
                continue
            if samples is None or int(nr) in samples:
                if nr in used_samples:
                    used_samples[nr].append(os.path.join(path,f))
                else:
                    used_samples[nr] = [os.path.join(path,f)]
    return used_samples

Example #7

Source File: classifier_nodes.py From me-ica with GNU Lesser General Public License v2.1

6 votes

def __init__(self, k=1, execute_method=None,
                 input_dim=None, output_dim=None, dtype=None):
        """Initialize classifier.
        
        k -- Number of closest sample points that are taken into account.
        """
        super(KNNClassifier, self).__init__(execute_method=execute_method,
                                            input_dim=input_dim,
                                            output_dim=output_dim,
                                            dtype=dtype)
        self.k = k
        self._label_samples = {}  # temporary variable during training
        self.n_samples = None
        # initialized after training:
        self.samples = None  # 2d array with all samples
        self.sample_label_indices = None  # 1d array for label indices
        self.ordered_labels = []

Example #8

Source File: kmeans.py From hadrian with Apache License 2.0

6 votes

def randomSubset(self, subsetSize):
        """Return a (dataset, weights) that are randomly chosen to have ``subsetSize`` records.

        :type subsetSize: positive integer
        :param subsetSize: size of the sample
        :rtype: (2-d Numpy array, 1-d Numpy array)
        :return: (dataset, weights) sampled without replacement (if the original dataset is unique, the new one will be, too)
        """

        if subsetSize <= self.numberOfClusters:
            raise TypeError("subsetSize must be strictly greater than the numberOfClusters")

        indexes = random.sample(xrange(self.dataset.shape[0]), subsetSize)
        dataset = self.dataset[indexes,:]
        if self.weights is None:
            weights = None
        else:
            weights = self.weights[indexes]

        return dataset, weights

Example #9

Source File: pairfinder.py From svviz with MIT License

6 votes

def getToMatchWithSampling(self):
        readIDs = set()

        logging.info("  exceeded number of reads required to begin sampling; performing sampling")
        for region in self.regions:
            for read in self.loadRegion(region.chr(), region.start(), region.end()):
                readIDs.add(read.qname)

        readIDs = random.sample(readIDs, self.sampleReads)

        tomatch = set()
        readsByID = collections.defaultdict(ReadSet)

        for region in self.regions:
            for read in self.loadRegion(region.chr(), region.start(), region.end()):
                if read.qname in readIDs:
                    tomatch.add(read)
                    readsByID[read.qname].add(read)

        return tomatch, readsByID

Example #10

Source File: decision_tree.py From discomll with Apache License 2.0

6 votes

def rand_indices(x, rand_attr):
    """
    Function randomly selects features without replacement. It used with random forest. Selected features must have more
    than one distinct value.
    x: numpy array - dataset
    rand_attr - parameter defines number of randomly selected features
    """
    loop = True
    indices = range(len(x[0]))

    while loop:
        loop = False
        # randomly selected features without replacement
        rand_list = random.sample(indices, rand_attr)
        for i in rand_list:
            if len(np.unique(x[:, i])) == 1:
                loop = True
                indices.remove(i)
                if len(indices) == rand_attr - 1:
                    return -1  # all features in dataset have one distinct value
                break
    return rand_list

Example #11

Source File: replay_buffer.py From DOTA_models with Apache License 2.0

6 votes

def remove_n(self, n):
    """Get n items for removal."""
    assert self.init_length + n <= self.cur_size

    if self.eviction_strategy == 'rand':
      # random removal
      idxs = random.sample(xrange(self.init_length, self.cur_size), n)
    elif self.eviction_strategy == 'fifo':
      # overwrite elements in cyclical fashion
      idxs = [
          self.init_length +
          (self.remove_idx + i) % (self.max_size - self.init_length)
          for i in xrange(n)]
      self.remove_idx = idxs[-1] + 1 - self.init_length
    elif self.eviction_strategy == 'rank':
      # remove lowest-priority indices
      idxs = np.argpartition(self.priorities, n)[:n]

    return idxs

Example #12

Source File: data_loader.py From OpenNRE with MIT License

6 votes

def __getitem__(self, index):
        bag = self.bag_scope[index]
        if self.bag_size > 0:
            if self.bag_size <= len(bag):
                resize_bag = random.sample(bag, self.bag_size)
            else:
                resize_bag = bag + list(np.random.choice(bag, self.bag_size - len(bag)))
            bag = resize_bag
            
        seqs = None
        rel = self.rel2id[self.data[bag[0]]['relation']]
        for sent_id in bag:
            item = self.data[sent_id]
            seq = list(self.tokenizer(item))
            if seqs is None:
                seqs = []
                for i in range(len(seq)):
                    seqs.append([])
            for i in range(len(seq)):
                seqs[i].append(seq[i])
        for i in range(len(seqs)):
            seqs[i] = torch.cat(seqs[i], 0) # (n, L), n is the size of bag
        return [rel, self.bag_name[index], len(bag)] + seqs

Example #13

Source File: demo_sampler_wrapper.py From robosuite with MIT License

6 votes

def reset(self):
        """
        Logic for sampling a state from the demonstration and resetting
        the simulation to that state. 
        """
        state = self.sample()
        if state is None:
            # None indicates that a normal env reset should occur
            return self.env.reset()
        else:
            if self.need_xml:
                # reset the simulation from the model if necessary
                state, xml = state
                self.env.reset_from_xml_string(xml)

            if isinstance(state, tuple):
                state = state[0]

            # force simulator state to one from the demo
            self.sim.set_state_from_flattened(state)
            self.sim.forward()

            return self.env._get_observation()

Example #14

Source File: demo_sampler_wrapper.py From robosuite with MIT License

6 votes

def sample(self):
        """
        This is the core sampling method. Samples a state from a
        demonstration, in accordance with the configuration.
        """

        # chooses a sampling scheme randomly based on the mixing ratios
        seed = random.uniform(0, 1)
        ratio = np.cumsum(self.scheme_ratios)
        ratio = ratio > seed
        for i, v in enumerate(ratio):
            if v:
                break

        sample_method = getattr(self, self.sample_method_dict[self.sampling_schemes[i]])
        return sample_method()

Example #15

Source File: demo_sampler_wrapper.py From robosuite with MIT License

6 votes

def _uniform_sample(self):
        """
        Sampling method.

        First uniformly sample a demonstration from the set of demonstrations.
        Then uniformly sample a state from the selected demonstration.
        """

        # get a random episode index
        ep_ind = random.choice(self.demo_list)

        # select a flattened mujoco state uniformly from this episode
        states = self.demo_file["data/{}/states".format(ep_ind)].value
        state = random.choice(states)

        if self.need_xml:
            model_xml = self._xml_for_episode_index(ep_ind)
            xml = postprocess_model_xml(model_xml)
            return state, xml
        return state

Example #16

Source File: optimizer.py From YaYaGen with BSD 2-Clause "Simplified" License

6 votes

def basic_optimizer(rule):
    """
    Randombly remove attributes, until reaching UPPER_THRESHOLD
    """
    from . import rule as rule_class
    if rule.evaluate() < rule_class.YaraRule.values['UPPER_THRESHOLD']:
        return rule

    optimized = False
    opt_rule = rule_class.YaraRule(rule)

    while not optimized:
        candidate = random.sample(opt_rule, 1)[0]
        temp = rule_class.YaraRule(opt_rule - {candidate})
        weight = temp.evaluate()
        c1 = weight > rule_class.YaraRule.values['UPPER_THRESHOLD']
        c2 = weight > rule_class.YaraRule.values['THRESHOLD']
        if c1 and c2:
            opt_rule = temp
        else:
            optimized = True
    return opt_rule

Example #17

Source File: forests.py From trees with Apache License 2.0

6 votes

def build_tree(train, features, levels=5, numfeatures=100):
    'Train a decision tree based on labeled data and features'
    if levels == 0:
        C1 = Counter([b for _, b in train])
        Leaf = (None, C1)
        return Leaf
    else:
        try:
            X = (split(train, F) for F in random.sample(features, numfeatures))
            H, L1, L2, F = max(X)
            M1 = build_tree(L1, features, levels - 1, numfeatures)
            M2 = build_tree(L2, features, levels - 1, numfeatures)
            Branch = (F, M1, M2)
            return Branch
        except:
            return build_tree(train, features, levels=0)

Example #18

Source File: my_data.py From ICDAR-2019-SROIE with MIT License

6 votes

def get_val_data(self, batch_size=8, device="cpu"):
        keys = random.sample(self.val_dict.keys(), batch_size)

        texts = [self.val_dict[k][0] for k in keys]
        labels = [self.val_dict[k][1] for k in keys]

        maxlen = max(len(s) for s in texts)
        texts = [s.ljust(maxlen, " ") for s in texts]
        labels = [
            numpy.pad(a, (0, maxlen - len(a)), mode="constant", constant_values=0)
            for a in labels
        ]

        text_tensor = torch.zeros(maxlen, batch_size, dtype=torch.long)
        for i, text in enumerate(texts):
            text_tensor[:, i] = torch.LongTensor([VOCAB.find(c) for c in text])

        truth_tensor = torch.zeros(maxlen, batch_size, dtype=torch.long)
        for i, label in enumerate(labels):
            truth_tensor[:, i] = torch.LongTensor(label)

        return keys, text_tensor.to(self.device), truth_tensor.to(self.device)

Example #19

Source File: my_data.py From ICDAR-2019-SROIE with MIT License

6 votes

def get_train_data(self, batch_size=8):
        samples = random.sample(self.train_dict.keys(), batch_size)

        texts = [self.train_dict[k][0] for k in samples]
        labels = [self.train_dict[k][1] for k in samples]

        robust_padding(texts, labels)

        maxlen = max(len(t) for t in texts)

        text_tensor = torch.zeros(maxlen, batch_size, dtype=torch.long)
        for i, text in enumerate(texts):
            text_tensor[:, i] = torch.LongTensor([VOCAB.find(c) for c in text])

        truth_tensor = torch.zeros(maxlen, batch_size, dtype=torch.long)
        for i, label in enumerate(labels):
            truth_tensor[:, i] = torch.LongTensor(label)

        return text_tensor.to(self.device), truth_tensor.to(self.device)

Example #20

Source File: ddpg.py From tensorflow_RL with MIT License

6 votes

def train_model(self):
        batch = random.sample(self.memory,self.batch_size)
        states = np.asarray([e[0] for e in batch])
        actions = np.asarray([e[1] for e in batch])
        rewards = np.asarray([e[2] for e in batch])
        next_states = np.asarray([e[3] for e in batch])
        dones = np.asarray([e[4] for e in batch])
        target_action_input = self.sess.run(self.target_actor.actor,feed_dict={self.target_actor.state:next_states})
        target_q_value = self.sess.run(self.target_critic.critic,feed_dict={self.target_critic.state:next_states,
                                                                            self.target_critic.action:target_action_input})
        targets = np.asarray([r + self.gamma * (1-d) * tv for r,tv,d in zip(rewards,target_q_value,dones)])
        self.sess.run(self.ctrain_op,feed_dict=
        {
            self.critic.state:states,
            self.critic.action:actions,
            self.target_value:np.squeeze(targets)
        })
        action_for_train = self.sess.run(self.actor.actor,feed_dict={self.actor.state:states})
        self.sess.run(self.atrain_op,feed_dict=
        {
            self.actor.state:states,
            self.critic.state:states,
            self.critic.action:action_for_train
        })
        self.sess.run(self.update_target_soft)

Example #21

Source File: qrdqn.py From tensorflow_RL with MIT License

6 votes

def train_model(self):
        minibatch = random.sample(self.memory, self.batch_size)
        state_stack = [mini[0] for mini in minibatch]
        next_state_stack = [mini[1] for mini in minibatch]
        action_stack = [mini[2] for mini in minibatch]
        reward_stack = [mini[3] for mini in minibatch]
        done_stack = [mini[4] for mini in minibatch]
        done_stack = [int(i) for i in done_stack]
        onehotaction = np.zeros([self.batch_size, self.output_size])
        for i, j in zip(onehotaction, action_stack):
            i[j] = 1
        action_stack = np.stack(onehotaction)

        Q_next_state = self.sess.run(self.target_network, feed_dict={self.targetNet.input: next_state_stack})
        next_action = np.argmax(np.mean(Q_next_state, axis=2), axis=1)
        Q_next_state_next_action = [Q_next_state[i, action, :] for i, action in enumerate(next_action)]
        Q_next_state_next_action = np.sort(Q_next_state_next_action)
        T_theta = [np.ones(self.num_support) * reward if done else reward + self.gamma * Q for reward, Q, done in
                    zip(reward_stack, Q_next_state_next_action, done_stack)]
        _, l = self.sess.run([self.train_op, self.loss],
                                feed_dict={self.mainNet.input: state_stack, self.action: action_stack, self.Y: T_theta})
        return l

Example #22

Source File: dqn.py From tensorflow_RL with MIT License

6 votes

def train_model(self):
        minibatch = random.sample(self.memory, self.batch_size)
        state = [mini[0] for mini in minibatch]
        next_state = [mini[1] for mini in minibatch]
        action = [mini[2] for mini in minibatch]
        reward = [mini[3] for mini in minibatch]
        done = [mini[4] for mini in minibatch]

        nextQ = self.sess.run(self.targetNet.Q, feed_dict={self.targetNet.input: next_state})
        max_nextQ = np.max(nextQ, axis=1)
        targets = [r + self.gamma * (1-d) * mQ for r, d, mQ in zip(reward, done, max_nextQ)]
        _, l = self.sess.run([self.train_op, self.loss], feed_dict={self.mainNet.input: state,
                                                               self.target: targets,
                                                               self.action: action})

        return l

Example #23

Source File: egohands_dataset_clean.py From hand-detection.PyTorch with MIT License

5 votes

def split_data_test_eval_train(image_dir):
    create_directory("images")
    create_directory("images/train")
    create_directory("images/test")

    data_size = 4000
    loop_index = 0
    data_sampsize = int(0.1 * data_size)
    test_samp_array = random.sample(range(data_size), k=data_sampsize)

    for root, dirs, filenames in os.walk(image_dir):
        for dir in dirs:
            for f in os.listdir(image_dir + dir):
                if(f.split(".")[1] == "jpg"):
                    loop_index += 1
                    print(loop_index, f)

                    if loop_index in test_samp_array:
                        os.rename(image_dir + dir +
                                  "/" + f, "images/test/" + f)
                        os.rename(image_dir + dir +
                                  "/" + f.split(".")[0] + ".csv", "images/test/" + f.split(".")[0] + ".csv")
                    else:
                        os.rename(image_dir + dir +
                                  "/" + f, "images/train/" + f)
                        os.rename(image_dir + dir +
                                  "/" + f.split(".")[0] + ".csv", "images/train/" + f.split(".")[0] + ".csv")
                    print(loop_index, image_dir + f)
            print(">   done scanning director ", dir)
            os.remove(image_dir + dir + "/polygons.mat")
            os.rmdir(image_dir + dir)

        print("Train/test content generation complete!")
        generate_label_files("images/")

Example #24

Source File: WXBizMsgCrypt_py3.py From TaskBot with GNU General Public License v3.0

5 votes

def get_random_str(self):
        """ 随机生成16位字符串
        @return: 16位字符串
        """
        rule = string.ascii_letters + string.digits
        str = random.sample(rule, 16)
        return "".join(str).encode()

Example #25

Source File: utils.py From Machine-Translation with Apache License 2.0

5 votes

def pick_n_valid_sentences(input_lang, output_lang, n):
    samples_path = 'data/samples_train.json'
    samples = json.load(open(samples_path, 'r'))
    train_count = int(len(samples) * train_split)
    samples = samples[train_count:]
    # samples = samples[:train_count]
    samples = random.sample(samples, n)
    result = []
    for sample in samples:
        input_sentence = ''.join([input_lang.index2word[token] for token in sample['input'] if token != EOS_token])
        target_sentence = ' '.join([output_lang.index2word[token] for token in sample['output'] if token != EOS_token])
        result.append((input_sentence, target_sentence))
    return result

Example #26

Source File: dealer_factory.py From indras_net with GNU General Public License v3.0

5 votes

def generate_dealer(unused1, unused2, *kwargs):
    dealer = Agent("dealer" + str(random.randint(0, MAX_DEALERS)),
                   action=dealer_action)
    num_emojis = random.randint(1, len(DEF_CORRELATIONS) // 2)
    dealer.attrs["emojis"] = random.sample(emoji_list, num_emojis)
    dealer.attrs["avg_car_life"] = avg_life_from_emojis(dealer.attrs["emojis"])
    return dealer

Example #27

Source File: kmeans.py From tf2-yolo3 with Apache License 2.0

5 votes

def kmeans(dataset, k):
    num_samples = dataset.shape[0]
    # first column stores which cluster this sample belongs to,
    # second column stores the error between this sample and its centroid
    cluster_assignment = np.zeros((num_samples, 2))
    cluster_updated = True

    ## step 1: init centroids
    centroids = init_centroids(dataset, k)

    while cluster_updated:
        cluster_updated = False
        # for each sample
        for i in range(num_samples):  #range
            min_dist = np.finfo(np.float32).max
            min_index = 0
            # for each centroid
            # step 2: find the centroid who is closest
            for j in range(k):
                distance = eucl_dist(centroids[j, :], dataset[i, :])
                if distance < min_dist:
                    min_dist, min_index = distance, j
# step 3: update its cluster
            if cluster_assignment[i, 0] != min_index:
                cluster_updated = True
                cluster_assignment[i, :] = min_index, min_dist**2


# step 4: update centroids
        for j in range(k):
            pts_in_cluster = dataset[np.nonzero(cluster_assignment[:, 0] == j)[0]]
            centroids[j, :] = np.mean(pts_in_cluster, axis=0)
    return centroids, cluster_assignment

Example #28

Source File: replay_memory.py From tensortrade with Apache License 2.0

5 votes

def sample(self, batch_size) -> List[namedtuple]:
        return random.sample(self.memory, batch_size)

Example #29

Source File: testKMeans.py From hadrian with Apache License 2.0

5 votes

def data(*centers):
        while True:
            center, = random.sample(centers, 1)
            x = random.gauss(center[0], 1)
            y = random.gauss(center[1], 1)
            z = random.gauss(center[2], 1)
            yield (x, y, z)

Example #30

Source File: claims.py From zun with Apache License 2.0

5 votes

def claim_cpuset_cpu_for_container(self, container, limits):
        available_cpu = list(set(limits['cpuset']['cpuset_cpu']) -
                             set(limits['cpuset']['cpuset_cpu_pinned']))
        cpuset_cpu_usage = random.sample(available_cpu, int(self.cpu))
        container.cpuset.cpuset_cpus = set(cpuset_cpu_usage)