Python random.sample() Examples

The following are 30 code examples for showing how to use random.sample(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module random , or try the search function .

Example 1
Project: svviz   Author: svviz   File: pairfinder.py    License: MIT License 6 votes vote down vote up
def getToMatchWithSampling(self):
        readIDs = set()

        logging.info("  exceeded number of reads required to begin sampling; performing sampling")
        for region in self.regions:
            for read in self.loadRegion(region.chr(), region.start(), region.end()):
                readIDs.add(read.qname)

        readIDs = random.sample(readIDs, self.sampleReads)

        tomatch = set()
        readsByID = collections.defaultdict(ReadSet)

        for region in self.regions:
            for read in self.loadRegion(region.chr(), region.start(), region.end()):
                if read.qname in readIDs:
                    tomatch.add(read)
                    readsByID[read.qname].add(read)

        return tomatch, readsByID 
Example 2
Project: CAMISIM   Author: CAMI-challenge   File: create_joint_gs.py    License: Apache License 2.0 6 votes vote down vote up
def get_samples(root_paths, samples):
    """
    Given the root paths  of the CAMISIM runs and the subset of samples, returns a dict from sample number to folders
    Assumes the sample folders to be in the format YYYY.MM.DD_HH.MM.SS_sample_#
    """
    used_samples = {}
    for path in root_paths:
        if not os.path.exists(path):
            raise IOError("No such file or directory: %s" % path)
        files = os.listdir(path)
        for f in files:
            try:
                date, time, sample, nr = f.split("_")
            except ValueError:
                continue
            if samples is None or int(nr) in samples:
                if nr in used_samples:
                    used_samples[nr].append(os.path.join(path,f))
                else:
                    used_samples[nr] = [os.path.join(path,f)]
    return used_samples 
Example 3
Project: CAMISIM   Author: CAMI-challenge   File: create_joint_gs.py    License: Apache License 2.0 6 votes vote down vote up
def merge_bam_files(bams_per_genome, out, threads):
    """
    Merges (+sort +index)  all given bam files per genome (exact paths, single sample/multiple runs or multiple samples)
    """
    out_path = os.path.join(out,"bam")
    os.mkdir(out_path)
    for genome in bams_per_genome:
        list_of_bam = " ".join(bams_per_genome[genome]) # can be used as input to samtools immediately
        header = fix_headers(genome, bams_per_genome[genome], out_path)
        if header is not None:
            for bam in bams_per_genome[genome]: # add new header to all bam files
                cmd = "samtools reheader {header} {bam} >> {out}/out.bam; mv {out}/out.bam {bam}".format(
                    header = header,
                    out = out_path,
                    bam = bam
                )
                subprocess.call([cmd],shell=True)
        cmd = "samtools merge -@ {threads} - {bam_files} | samtools sort -@ {threads} - {path}/{genome}; samtools index {path}/{genome}.bam".format(
            threads = threads,
            bam_files = list_of_bam,
            path = out_path,
            genome = genome
        )
        subprocess.call([cmd],shell=True) # this runs a single command at a time (but that one multi threaded)
    return out_path 
Example 4
Project: discomll   Author: romanorac   File: decision_tree.py    License: Apache License 2.0 6 votes vote down vote up
def rand_indices(x, rand_attr):
    """
    Function randomly selects features without replacement. It used with random forest. Selected features must have more
    than one distinct value.
    x: numpy array - dataset
    rand_attr - parameter defines number of randomly selected features
    """
    loop = True
    indices = range(len(x[0]))

    while loop:
        loop = False
        # randomly selected features without replacement
        rand_list = random.sample(indices, rand_attr)
        for i in rand_list:
            if len(np.unique(x[:, i])) == 1:
                loop = True
                indices.remove(i)
                if len(indices) == rand_attr - 1:
                    return -1  # all features in dataset have one distinct value
                break
    return rand_list 
Example 5
Project: DOTA_models   Author: ringringyi   File: replay_buffer.py    License: Apache License 2.0 6 votes vote down vote up
def remove_n(self, n):
    """Get n items for removal."""
    assert self.init_length + n <= self.cur_size

    if self.eviction_strategy == 'rand':
      # random removal
      idxs = random.sample(xrange(self.init_length, self.cur_size), n)
    elif self.eviction_strategy == 'fifo':
      # overwrite elements in cyclical fashion
      idxs = [
          self.init_length +
          (self.remove_idx + i) % (self.max_size - self.init_length)
          for i in xrange(n)]
      self.remove_idx = idxs[-1] + 1 - self.init_length
    elif self.eviction_strategy == 'rank':
      # remove lowest-priority indices
      idxs = np.argpartition(self.priorities, n)[:n]

    return idxs 
Example 6
Project: OpenNRE   Author: thunlp   File: data_loader.py    License: MIT License 6 votes vote down vote up
def __getitem__(self, index):
        bag = self.bag_scope[index]
        if self.bag_size > 0:
            if self.bag_size <= len(bag):
                resize_bag = random.sample(bag, self.bag_size)
            else:
                resize_bag = bag + list(np.random.choice(bag, self.bag_size - len(bag)))
            bag = resize_bag
            
        seqs = None
        rel = self.rel2id[self.data[bag[0]]['relation']]
        for sent_id in bag:
            item = self.data[sent_id]
            seq = list(self.tokenizer(item))
            if seqs is None:
                seqs = []
                for i in range(len(seq)):
                    seqs.append([])
            for i in range(len(seq)):
                seqs[i].append(seq[i])
        for i in range(len(seqs)):
            seqs[i] = torch.cat(seqs[i], 0) # (n, L), n is the size of bag
        return [rel, self.bag_name[index], len(bag)] + seqs 
Example 7
Project: robosuite   Author: StanfordVL   File: demo_sampler_wrapper.py    License: MIT License 6 votes vote down vote up
def reset(self):
        """
        Logic for sampling a state from the demonstration and resetting
        the simulation to that state. 
        """
        state = self.sample()
        if state is None:
            # None indicates that a normal env reset should occur
            return self.env.reset()
        else:
            if self.need_xml:
                # reset the simulation from the model if necessary
                state, xml = state
                self.env.reset_from_xml_string(xml)

            if isinstance(state, tuple):
                state = state[0]

            # force simulator state to one from the demo
            self.sim.set_state_from_flattened(state)
            self.sim.forward()

            return self.env._get_observation() 
Example 8
Project: robosuite   Author: StanfordVL   File: demo_sampler_wrapper.py    License: MIT License 6 votes vote down vote up
def sample(self):
        """
        This is the core sampling method. Samples a state from a
        demonstration, in accordance with the configuration.
        """

        # chooses a sampling scheme randomly based on the mixing ratios
        seed = random.uniform(0, 1)
        ratio = np.cumsum(self.scheme_ratios)
        ratio = ratio > seed
        for i, v in enumerate(ratio):
            if v:
                break

        sample_method = getattr(self, self.sample_method_dict[self.sampling_schemes[i]])
        return sample_method() 
Example 9
Project: robosuite   Author: StanfordVL   File: demo_sampler_wrapper.py    License: MIT License 6 votes vote down vote up
def _uniform_sample(self):
        """
        Sampling method.

        First uniformly sample a demonstration from the set of demonstrations.
        Then uniformly sample a state from the selected demonstration.
        """

        # get a random episode index
        ep_ind = random.choice(self.demo_list)

        # select a flattened mujoco state uniformly from this episode
        states = self.demo_file["data/{}/states".format(ep_ind)].value
        state = random.choice(states)

        if self.need_xml:
            model_xml = self._xml_for_episode_index(ep_ind)
            xml = postprocess_model_xml(model_xml)
            return state, xml
        return state 
Example 10
Project: trees   Author: gdanezis   File: forests.py    License: Apache License 2.0 6 votes vote down vote up
def build_tree(train, features, levels=5, numfeatures=100):
    'Train a decision tree based on labeled data and features'
    if levels == 0:
        C1 = Counter([b for _, b in train])
        Leaf = (None, C1)
        return Leaf
    else:
        try:
            X = (split(train, F) for F in random.sample(features, numfeatures))
            H, L1, L2, F = max(X)
            M1 = build_tree(L1, features, levels - 1, numfeatures)
            M2 = build_tree(L2, features, levels - 1, numfeatures)
            Branch = (F, M1, M2)
            return Branch
        except:
            return build_tree(train, features, levels=0) 
Example 11
Project: tensorflow_RL   Author: RLOpensource   File: ddpg.py    License: MIT License 6 votes vote down vote up
def train_model(self):
        batch = random.sample(self.memory,self.batch_size)
        states = np.asarray([e[0] for e in batch])
        actions = np.asarray([e[1] for e in batch])
        rewards = np.asarray([e[2] for e in batch])
        next_states = np.asarray([e[3] for e in batch])
        dones = np.asarray([e[4] for e in batch])
        target_action_input = self.sess.run(self.target_actor.actor,feed_dict={self.target_actor.state:next_states})
        target_q_value = self.sess.run(self.target_critic.critic,feed_dict={self.target_critic.state:next_states,
                                                                            self.target_critic.action:target_action_input})
        targets = np.asarray([r + self.gamma * (1-d) * tv for r,tv,d in zip(rewards,target_q_value,dones)])
        self.sess.run(self.ctrain_op,feed_dict=
        {
            self.critic.state:states,
            self.critic.action:actions,
            self.target_value:np.squeeze(targets)
        })
        action_for_train = self.sess.run(self.actor.actor,feed_dict={self.actor.state:states})
        self.sess.run(self.atrain_op,feed_dict=
        {
            self.actor.state:states,
            self.critic.state:states,
            self.critic.action:action_for_train
        })
        self.sess.run(self.update_target_soft) 
Example 12
Project: tensorflow_RL   Author: RLOpensource   File: dqn.py    License: MIT License 6 votes vote down vote up
def train_model(self):
        minibatch = random.sample(self.memory, self.batch_size)
        state = [mini[0] for mini in minibatch]
        next_state = [mini[1] for mini in minibatch]
        action = [mini[2] for mini in minibatch]
        reward = [mini[3] for mini in minibatch]
        done = [mini[4] for mini in minibatch]

        nextQ = self.sess.run(self.targetNet.Q, feed_dict={self.targetNet.input: next_state})
        max_nextQ = np.max(nextQ, axis=1)
        targets = [r + self.gamma * (1-d) * mQ for r, d, mQ in zip(reward, done, max_nextQ)]
        _, l = self.sess.run([self.train_op, self.loss], feed_dict={self.mainNet.input: state,
                                                               self.target: targets,
                                                               self.action: action})

        return l 
Example 13
Project: tensorflow_RL   Author: RLOpensource   File: qrdqn.py    License: MIT License 6 votes vote down vote up
def train_model(self):
        minibatch = random.sample(self.memory, self.batch_size)
        state_stack = [mini[0] for mini in minibatch]
        next_state_stack = [mini[1] for mini in minibatch]
        action_stack = [mini[2] for mini in minibatch]
        reward_stack = [mini[3] for mini in minibatch]
        done_stack = [mini[4] for mini in minibatch]
        done_stack = [int(i) for i in done_stack]
        onehotaction = np.zeros([self.batch_size, self.output_size])
        for i, j in zip(onehotaction, action_stack):
            i[j] = 1
        action_stack = np.stack(onehotaction)

        Q_next_state = self.sess.run(self.target_network, feed_dict={self.targetNet.input: next_state_stack})
        next_action = np.argmax(np.mean(Q_next_state, axis=2), axis=1)
        Q_next_state_next_action = [Q_next_state[i, action, :] for i, action in enumerate(next_action)]
        Q_next_state_next_action = np.sort(Q_next_state_next_action)
        T_theta = [np.ones(self.num_support) * reward if done else reward + self.gamma * Q for reward, Q, done in
                    zip(reward_stack, Q_next_state_next_action, done_stack)]
        _, l = self.sess.run([self.train_op, self.loss],
                                feed_dict={self.mainNet.input: state_stack, self.action: action_stack, self.Y: T_theta})
        return l 
Example 14
Project: ICDAR-2019-SROIE   Author: zzzDavid   File: my_data.py    License: MIT License 6 votes vote down vote up
def get_train_data(self, batch_size=8):
        samples = random.sample(self.train_dict.keys(), batch_size)

        texts = [self.train_dict[k][0] for k in samples]
        labels = [self.train_dict[k][1] for k in samples]

        robust_padding(texts, labels)

        maxlen = max(len(t) for t in texts)

        text_tensor = torch.zeros(maxlen, batch_size, dtype=torch.long)
        for i, text in enumerate(texts):
            text_tensor[:, i] = torch.LongTensor([VOCAB.find(c) for c in text])

        truth_tensor = torch.zeros(maxlen, batch_size, dtype=torch.long)
        for i, label in enumerate(labels):
            truth_tensor[:, i] = torch.LongTensor(label)

        return text_tensor.to(self.device), truth_tensor.to(self.device) 
Example 15
Project: ICDAR-2019-SROIE   Author: zzzDavid   File: my_data.py    License: MIT License 6 votes vote down vote up
def get_val_data(self, batch_size=8, device="cpu"):
        keys = random.sample(self.val_dict.keys(), batch_size)

        texts = [self.val_dict[k][0] for k in keys]
        labels = [self.val_dict[k][1] for k in keys]

        maxlen = max(len(s) for s in texts)
        texts = [s.ljust(maxlen, " ") for s in texts]
        labels = [
            numpy.pad(a, (0, maxlen - len(a)), mode="constant", constant_values=0)
            for a in labels
        ]

        text_tensor = torch.zeros(maxlen, batch_size, dtype=torch.long)
        for i, text in enumerate(texts):
            text_tensor[:, i] = torch.LongTensor([VOCAB.find(c) for c in text])

        truth_tensor = torch.zeros(maxlen, batch_size, dtype=torch.long)
        for i, label in enumerate(labels):
            truth_tensor[:, i] = torch.LongTensor(label)

        return keys, text_tensor.to(self.device), truth_tensor.to(self.device) 
Example 16
Project: YaYaGen   Author: jimmy-sonny   File: optimizer.py    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
def basic_optimizer(rule):
    """
    Randombly remove attributes, until reaching UPPER_THRESHOLD
    """
    from . import rule as rule_class
    if rule.evaluate() < rule_class.YaraRule.values['UPPER_THRESHOLD']:
        return rule

    optimized = False
    opt_rule = rule_class.YaraRule(rule)

    while not optimized:
        candidate = random.sample(opt_rule, 1)[0]
        temp = rule_class.YaraRule(opt_rule - {candidate})
        weight = temp.evaluate()
        c1 = weight > rule_class.YaraRule.values['UPPER_THRESHOLD']
        c2 = weight > rule_class.YaraRule.values['THRESHOLD']
        if c1 and c2:
            opt_rule = temp
        else:
            optimized = True
    return opt_rule 
Example 17
Project: NTM-One-Shot-TF   Author: hmishra2250   File: Generator.py    License: MIT License 6 votes vote down vote up
def sample(self, nb_samples):
        sampled_character_folders = random.sample(self.character_folders, nb_samples)
        random.shuffle(sampled_character_folders)

        example_inputs = np.zeros((self.batch_size, nb_samples * self.nb_samples_per_class, np.prod(self.img_size)), dtype=np.float32)
        example_outputs = np.zeros((self.batch_size, nb_samples * self.nb_samples_per_class), dtype=np.float32)     #notice hardcoded np.float32 here and above, change it to something else in tf

        for i in range(self.batch_size):
            labels_and_images = get_shuffled_images(sampled_character_folders, range(nb_samples), nb_samples=self.nb_samples_per_class)
            sequence_length = len(labels_and_images)
            labels, image_files = zip(*labels_and_images)

            angles = np.random.uniform(-self.max_rotation, self.max_rotation, size=sequence_length)
            shifts = np.random.uniform(-self.max_shift, self.max_shift, size=sequence_length)

            example_inputs[i] = np.asarray([load_transform(filename, angle=angle, s=shift, size=self.img_size).flatten() \
                                            for (filename, angle, shift) in zip(image_files, angles, shifts)], dtype=np.float32)
            example_outputs[i] = np.asarray(labels, dtype=np.int32)

        return example_inputs, example_outputs 
Example 18
Project: me-ica   Author: ME-ICA   File: classifier_nodes.py    License: GNU Lesser General Public License v2.1 6 votes vote down vote up
def __init__(self, k=1, execute_method=None,
                 input_dim=None, output_dim=None, dtype=None):
        """Initialize classifier.
        
        k -- Number of closest sample points that are taken into account.
        """
        super(KNNClassifier, self).__init__(execute_method=execute_method,
                                            input_dim=input_dim,
                                            output_dim=output_dim,
                                            dtype=dtype)
        self.k = k
        self._label_samples = {}  # temporary variable during training
        self.n_samples = None
        # initialized after training:
        self.samples = None  # 2d array with all samples
        self.sample_label_indices = None  # 1d array for label indices
        self.ordered_labels = [] 
Example 19
Project: chainerrl   Author: chainer   File: test_random.py    License: MIT License 6 votes vote down vote up
def _test(self):
        t = self.get_timeit(
            "from chainerrl.misc.random import sample_n_k")

        # faster than random.sample
        t1 = self.get_timeit("""
import random
def sample_n_k(n, k):
    return random.sample(range(n), k)
""")
        self.assertLess(t, t1)

        # faster than np.random.choice(..., replace=False)
        t2 = self.get_timeit("""
import numpy as np
def sample_n_k(n, k):
    return np.random.choice(n, k, replace=False)
""")
        self.assertLess(t, t2) 
Example 20
Project: hadrian   Author: modelop   File: kmeans.py    License: Apache License 2.0 6 votes vote down vote up
def randomSubset(self, subsetSize):
        """Return a (dataset, weights) that are randomly chosen to have ``subsetSize`` records.

        :type subsetSize: positive integer
        :param subsetSize: size of the sample
        :rtype: (2-d Numpy array, 1-d Numpy array)
        :return: (dataset, weights) sampled without replacement (if the original dataset is unique, the new one will be, too)
        """

        if subsetSize <= self.numberOfClusters:
            raise TypeError("subsetSize must be strictly greater than the numberOfClusters")

        indexes = random.sample(xrange(self.dataset.shape[0]), subsetSize)
        dataset = self.dataset[indexes,:]
        if self.weights is None:
            weights = None
        else:
            weights = self.weights[indexes]

        return dataset, weights 
Example 21
Project: reinforcement_learning   Author: yrlu   File: exp_replay.py    License: MIT License 6 votes vote down vote up
def sample(self, num=None):
    """Randomly draw [num] samples"""
    if num == None:
      num = self.batch_size
    if len(self.mem) < self.start_mem:
      return []
    sampled_idx = random.sample(range(abs(self.kth),len(self.mem)), num)
    samples = []
    for idx in sampled_idx:
      steps = self.mem[idx-abs(self.kth):idx]
      cur_state = np.stack([s.cur_step for s in steps], axis=len(self.state_size))
      next_state = np.stack([s.next_step for s in steps], axis=len(self.state_size))
      # handle special cases
      if self.kth == -1:
        cur_state = steps[0].cur_step
        next_state = steps[0].next_step
      elif len(self.state_size) == 1:
        cur_state = [steps[0].cur_step]
        next_state = [steps[0].next_step]
      reward = steps[-1].reward
      action = steps[-1].action
      done = steps[-1].done
      samples.append(Step(cur_step=cur_state, action=action, next_step=next_state, reward=reward, done=done))
    return samples 
Example 22
Project: reinforcement_learning   Author: yrlu   File: exp_replay.py    License: MIT License 6 votes vote down vote up
def sample(self, num=None):
    """Randomly draw [num] samples"""
    if num == None:
      num = self.batch_size
    if len(self.mem) < self.start_mem:
      return []
    sampled_idx = random.sample(range(abs(self.kth),len(self.mem)), num)
    samples = []
    for idx in sampled_idx:
      steps = self.mem[idx-abs(self.kth):idx]
      cur_state = np.stack([s.cur_step for s in steps], axis=len(self.state_size))
      next_state = np.stack([s.next_step for s in steps], axis=len(self.state_size))
      # handle special cases
      if self.kth == -1:
        cur_state = steps[0].cur_step
        next_state = steps[0].next_step
      elif len(self.state_size) == 1:
        cur_state = [steps[0].cur_step]
        next_state = [steps[0].next_step]
      reward = steps[-1].reward
      action = steps[-1].action
      done = steps[-1].done
      samples.append(Step(cur_step=cur_state, action=action, next_step=next_state, reward=reward, done=done))
    return samples 
Example 23
Project: indras_net   Author: gcallah   File: dealer_factory.py    License: GNU General Public License v3.0 5 votes vote down vote up
def generate_dealer(unused1, unused2, *kwargs):
    dealer = Agent("dealer" + str(random.randint(0, MAX_DEALERS)),
                   action=dealer_action)
    num_emojis = random.randint(1, len(DEF_CORRELATIONS) // 2)
    dealer.attrs["emojis"] = random.sample(emoji_list, num_emojis)
    dealer.attrs["avg_car_life"] = avg_life_from_emojis(dealer.attrs["emojis"])
    return dealer 
Example 24
Project: mutatest   Author: EvanKepner   File: run.py    License: MIT License 5 votes vote down vote up
def get_sample(ggrp: GenomeGroup, ignore_coverage: bool) -> List[GenomeGroupTarget]:
    """Get the sample space for the mutation trials.

    This will attempt to use covered-targets as the default unless ``ignore_coverage`` is set
    to True. If the set .coverage file is not found then the total targets are returned instead.

    Args:
        ggrp: the Genome Group to generate the sample space of targets
        ignore_coverage: flag to ignore coverage if present

    Returns:
        Sorted list of Path-LocIndex pairs as complete sample space from the ``GenomeGroup``.
    """
    if ignore_coverage:
        LOGGER.info("Ignoring coverage file for sample space creation.")

    try:
        sample = ggrp.targets if ignore_coverage else ggrp.covered_targets

    except FileNotFoundError:
        LOGGER.info("Coverage file does not exist, proceeding to sample from all targets.")
        sample = ggrp.targets

    # sorted list used for repeat trials using random seed instead of set
    sort_by_keys = attrgetter(
        "source_path",
        "loc_idx.lineno",
        "loc_idx.col_offset",
        "loc_idx.end_lineno",
        "loc_idx.end_col_offset",
    )
    return sorted(sample, key=sort_by_keys) 
Example 25
Project: ALF   Author: blackberry   File: BinaryFuzz.py    License: Apache License 2.0 5 votes vote down vote up
def _select_active_fuzz_types(self):
        """
        This method is used to randomly disable different fuzz types on a per iteration basis.
        """
        type_count = len(self.fuzz_types)
        if type_count < 2:
            return
        self.active_fuzz_types = random.sample(self.fuzz_types, random.randint(1, type_count)) 
Example 26
Project: wechatpy   Author: wechatpy   File: utils.py    License: MIT License 5 votes vote down vote up
def random_string(length=16):
    rule = string.ascii_letters + string.digits
    rand_list = random.sample(rule, length)
    return "".join(rand_list) 
Example 27
Project: comet-commonsense   Author: atcbosselut   File: atomic.py    License: Apache License 2.0 5 votes vote down vote up
def select_partial_dataset(data_opts, data):
    num_selections = math.ceil(data_opts.kr * len(data))
    return random.sample(data, num_selections) 
Example 28
Project: BiblioPixelAnimations   Author: ManiacalLabs   File: Random.py    License: MIT License 5 votes vote down vote up
def step(self, amt=1):
        if not (self.every and self.cur_step % self.every):
            indexes = range(len(self.color_list))
            if self.count:
                indexes = random.sample(indexes, self.count)
            d = self.distribution
            r, g, b = self.levels
            for i in indexes:
                self.color_list[i] = (r * d(), g * d(), b * d()) 
Example 29
Project: subword-qac   Author: clovaai   File: generate.py    License: MIT License 5 votes vote down vote up
def main(args):
    logger.info(f"Args: {json.dumps(args.__dict__, indent=2, sort_keys=True)}")

    spm_path = os.path.join('spm', args.spm, "spm.model")
    logger.info(f"Loading tokenizer from {spm_path}")
    tokenizer = Tokenizer(spm_path)
    args.ntoken = ntoken = len(tokenizer)
    args.branching_factor = min([args.branching_factor, args.ntoken])
    logger.info(f"  Vocab size: {ntoken}")

    n_queries_str = f"{f'only {args.n_queries} samples' if args.n_queries else 'all'} quries from"
    logger.info(f"Reading a dataset ({n_queries_str} test.query.txt)")
    seen_set = set(read_data(os.path.join(args.data_dir, "train.query.txt"), min_len=args.min_len))
    test_data = read_data(os.path.join(args.data_dir, "test.query.txt"), min_len=args.min_len)
    if args.n_queries:
        random.seed(args.seed)
        test_data = random.sample(test_data, args.n_queries)
    n_seen_test_data = len([x for x in test_data if x in seen_set])
    n_unseen_test_data = len(test_data) - n_seen_test_data
    logger.info(f"  Number of test data: {len(test_data):8d} (seen {n_seen_test_data}, unseen {n_unseen_test_data})")

    logger.info(f"Loading model from {args.model_dir}")
    model = model_load(args.model_dir)
    model = model.to(device)

    logger.info('Generation starts!')
    with torch.no_grad():
        generate(model, tokenizer, test_data, args, seen_set=seen_set, calc_mrl=args.calc_mrl) 
Example 30
Project: CAMISIM   Author: CAMI-challenge   File: create_joint_gs.py    License: Apache License 2.0 5 votes vote down vote up
def create_gold_standards(bamtogold, used_samples, metadata, out, threads, shuffle, name="S"):
    """
    Creation of the gold standards per sample. Uses the helper script bamToGold and merges all bam files of the same genome per sample across runs
    """
    for sample in used_samples:
        runs = used_samples[sample]
        bam_per_genome = add_to_bam_per_genome({}, runs)
        contig_name = name + str(sample) + "C"
        sample_path = os.path.join(out,"sample_%s" % sample) # creating a folder for every sample
        os.mkdir(sample_path)
        merged = merge_bam_files(bam_per_genome, sample_path, threads)
        bamToGold(bamtogold, merged, sample_path, metadata, threads)
        create_gsa_mapping(sample_path, metadata, contig_name, shuffle)