Python random.sample() Examples

The following are code examples for showing how to use random.sample(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: meta-transfer-learning   Author: erfaneshrati   File: reptile.py    MIT License 6 votes vote down vote up
def _sample_mini_dataset(dataset, num_classes, num_shots, metatransfer=False):
    """
    Sample a few shot task from a dataset.

    Returns:
      An iterable of (input, label) pairs.
    """
    shuffled = list(dataset)
    if metatransfer:
        indices = np.random.randint(64, size=num_classes)
        for class_idx, class_real_idx in enumerate(indices):
            for sample in shuffled[class_real_idx].sample(num_shots):
                yield (sample, class_idx, class_real_idx)
    else:
        random.shuffle(shuffled)
        for class_idx, class_obj in enumerate(shuffled[:num_classes]):
            for sample in class_obj.sample(num_shots):
                yield (sample, class_idx) 
Example 2
Project: model-api-sequence   Author: evandowning   File: sim_stats.py    GNU General Public License v3.0 6 votes vote down vote up
def get_labels(folder,fn):
    rv = dict()

    with open(fn,'r') as fr:
        for line in fr:
            line = line.strip('\n')
            h = line.split('\t')[0]
            label = line.split('\t')[-1]

            # If this sample contains no data, ignore it
            if os.path.getsize(os.path.join(folder,h)) == 0:
                continue

            if label not in rv:
                rv[label] = list()

            rv[label].append(h)

    return rv 
Example 3
Project: f5go   Author: f5devcentral   File: go.py    MIT License 6 votes vote down vote up
def clicked(self, n=1):
        """
        :param n: The number of clicks to record
        :return:
        """
        todayord = today()
        if todayord not in self.clickData:
            # partition clickdata around 30 days ago
            archival = []
            recent = []
            for od, nclicks in list(self.clickData.items()):
                if todayord - 30 > od:
                    archival.append((od, nclicks))
                else:
                    recent.append((od, nclicks))

            # archive older samples
            if archival:
                self.archivedClicks += sum(nclicks for od, nclicks in archival)

            # recent will have at least one sample if it was ever clicked
            recent.append((todayord, n))
            self.clickData = dict(recent)
        else:
            self.clickData[todayord] += n 
Example 4
Project: pyblish-win   Author: pyblish   File: test_random.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_sample_distribution(self):
        # For the entire allowable range of 0 <= k <= N, validate that
        # sample generates all possible permutations
        n = 5
        pop = range(n)
        trials = 10000  # large num prevents false negatives without slowing normal case
        def factorial(n):
            return reduce(int.__mul__, xrange(1, n), 1)
        for k in xrange(n):
            expected = factorial(n) // factorial(n-k)
            perms = {}
            for i in xrange(trials):
                perms[tuple(self.gen.sample(pop, k))] = None
                if len(perms) == expected:
                    break
            else:
                self.fail() 
Example 5
Project: pyblish-win   Author: pyblish   File: test_random.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_sample_on_dicts(self):
        self.gen.sample(dict.fromkeys('abcdefghijklmnopqrst'), 2)

        # SF bug #1460340 -- random.sample can raise KeyError
        a = dict.fromkeys(range(10)+range(10,100,2)+range(100,110))
        self.gen.sample(a, 3)

        # A followup to bug #1460340:  sampling from a dict could return
        # a subset of its keys or of its values, depending on the size of
        # the subset requested.
        N = 30
        d = dict((i, complex(i, i)) for i in xrange(N))
        for k in xrange(N+1):
            samp = self.gen.sample(d, k)
            # Verify that we got ints back (keys); the values are complex.
            for x in samp:
                self.assertTrue(type(x) is int)
        samp.sort()
        self.assertEqual(samp, range(N)) 
Example 6
Project: claxon   Author: vanatteveldt   File: ml.py    GNU General Public License v3.0 6 votes vote down vote up
def get_todo(session: Session, model: Language, n=10) -> OrderedDict:
    """Populate the queue of documents to code"""
    done = {a.document_id for a in Annotation.objects.filter(document__gold=False, label=session.label)}
    todo = Document.objects.filter(gold=False).exclude(pk__in=done)
    if session.query:
        todo = todo.filter(text__icontains=session.query)
    todo = list(todo.values_list("id", flat=True))
    logging.debug("{ntodo} documents in todo (query: {q}, done={ndone})"
                  .format(ntodo=len(todo), ndone=len(done), q=session.query))
    if len(todo) > settings.N_SAMPLE:
        todo = sample(todo, settings.N_SAMPLE)

    tc = model.get_pipe("textcat")
    tokens = [get_tokens(model, doc_id) for doc_id in todo]
    scores = [d.cats[session.label.label] for d in tc.pipe(tokens)]
    uncertainty = [abs(score - 0.5) for score in scores]
    index = list(argsort(uncertainty))[:n]

    return OrderedDict((todo[i], scores[i]) for i in index) 
Example 7
Project: ieml   Author: IEMLdev   File: tools.py    GNU General Public License v3.0 6 votes vote down vote up
def _build_graph_object(self, primitive, mode, max_nodes=6):
        nodes = {primitive()}
        modes = set()

        if max_nodes < 2:
            raise ValueError('Max nodes >= 2.')

        result = set()

        for i in range(random.randint(2, max_nodes)):
            while True:
                s, a, m = random.sample(nodes, 1)[0], primitive(), mode()
                if a in nodes or m in nodes or a in modes:
                    continue

                nodes.add(a)
                modes.add(m)

                result.add((s, a, m))
                break
        return result 
Example 8
Project: programsynthesishunting   Author: flexgp   File: agent.py    GNU General Public License v3.0 6 votes vote down vote up
def sense(self, agents):
        # This part makes this GE algorithm useful for multi-agent systems. This method is responsible to sense
        # information from the environment
        # This method would be overridden by actual robots following the different logic for near by agents discovery

        import random

        # Logic that defines how a agent discovers nearby agents
        # If the random value is greater than the interaction probability parameter that denotes 
        # the agent has found some nearby agents. Higher the probability, better the chance of agent to share its 
        # gnome with other agents 
        if random.random() > self.interaction_probability:
            #Turn the flag to True
            self.agents_found = True

            # Getting values to sample agents for interaction
            range_min = int( (self.interaction_probability * len(agents) ) / 3)
            range_max = int( (self.interaction_probability * len(agents) ) / 2)
            range_avg = int( (range_min + range_max) / 2)

            # Sample the agents from the list of agents. The number of samples depend on above values
            no_agents_found = random.sample(range (len(agents) ), random.choice( [range_min,range_max,range_avg] ) )
            
            # Extract the individuals from the nearby agents and store the individuals in the class variable
            self.nearby_agents = [ agents[id].individual[0] for id in no_agents_found ] 
Example 9
Project: Ansible-Example-AB2018   Author: umit-ozturk   File: linode.py    MIT License 6 votes vote down vote up
def randompass():
    '''
    Generate a long random password that comply to Linode requirements
    '''
    # Linode API currently requires the following:
    # It must contain at least two of these four character classes:
    # lower case letters - upper case letters - numbers - punctuation
    # we play it safe :)
    import random
    import string
    # as of python 2.4, this reseeds the PRNG from urandom
    random.seed()
    lower = ''.join(random.choice(string.ascii_lowercase) for x in range(6))
    upper = ''.join(random.choice(string.ascii_uppercase) for x in range(6))
    number = ''.join(random.choice(string.digits) for x in range(6))
    punct = ''.join(random.choice(string.punctuation) for x in range(6))
    p = lower + upper + number + punct
    return ''.join(random.sample(p, len(p))) 
Example 10
Project: flasky   Author: RoseOu   File: lorem_ipsum.py    MIT License 6 votes vote down vote up
def words(quantity=10, as_list=False):
    """Random words."""
    global _words

    if not _words:
        _words = ' '.join(get_dictionary('lorem_ipsum')).lower().\
            replace('\n', '')
        _words = re.sub(r'\.|,|;/', '', _words)
        _words = _words.split(' ')

    result = random.sample(_words, quantity)

    if as_list:
        return result
    else:
        return ' '.join(result) 
Example 11
Project: DOTA_models   Author: ringringyi   File: replay_buffer.py    Apache License 2.0 6 votes vote down vote up
def remove_n(self, n):
    """Get n items for removal."""
    assert self.init_length + n <= self.cur_size

    if self.eviction_strategy == 'rand':
      # random removal
      idxs = random.sample(xrange(self.init_length, self.cur_size), n)
    elif self.eviction_strategy == 'fifo':
      # overwrite elements in cyclical fashion
      idxs = [
          self.init_length +
          (self.remove_idx + i) % (self.max_size - self.init_length)
          for i in xrange(n)]
      self.remove_idx = idxs[-1] + 1 - self.init_length
    elif self.eviction_strategy == 'rank':
      # remove lowest-priority indices
      idxs = np.argpartition(self.priorities, n)[:n]

    return idxs 
Example 12
Project: OpenNRE   Author: thunlp   File: data_loader.py    MIT License 6 votes vote down vote up
def __getitem__(self, index):
        bag = self.bag_scope[index]
        if self.bag_size > 0:
            if self.bag_size <= len(bag):
                resize_bag = random.sample(bag, self.bag_size)
            else:
                resize_bag = bag + list(np.random.choice(bag, self.bag_size - len(bag)))
            bag = resize_bag
            
        seqs = None
        rel = self.rel2id[self.data[bag[0]]['relation']]
        for sent_id in bag:
            item = self.data[sent_id]
            seq = list(self.tokenizer(item))
            if seqs is None:
                seqs = []
                for i in range(len(seq)):
                    seqs.append([])
            for i in range(len(seq)):
                seqs[i].append(seq[i])
        for i in range(len(seqs)):
            seqs[i] = torch.cat(seqs[i], 0) # (n, L), n is the size of bag
        return [rel, self.bag_name[index], len(bag)] + seqs 
Example 13
Project: robosuite   Author: StanfordVL   File: demo_sampler_wrapper.py    MIT License 6 votes vote down vote up
def reset(self):
        """
        Logic for sampling a state from the demonstration and resetting
        the simulation to that state. 
        """
        state = self.sample()
        if state is None:
            # None indicates that a normal env reset should occur
            return self.env.reset()
        else:
            if self.need_xml:
                # reset the simulation from the model if necessary
                state, xml = state
                self.env.reset_from_xml_string(xml)

            if isinstance(state, tuple):
                state = state[0]

            # force simulator state to one from the demo
            self.sim.set_state_from_flattened(state)
            self.sim.forward()

            return self.env._get_observation() 
Example 14
Project: robosuite   Author: StanfordVL   File: demo_sampler_wrapper.py    MIT License 6 votes vote down vote up
def sample(self):
        """
        This is the core sampling method. Samples a state from a
        demonstration, in accordance with the configuration.
        """

        # chooses a sampling scheme randomly based on the mixing ratios
        seed = random.uniform(0, 1)
        ratio = np.cumsum(self.scheme_ratios)
        ratio = ratio > seed
        for i, v in enumerate(ratio):
            if v:
                break

        sample_method = getattr(self, self.sample_method_dict[self.sampling_schemes[i]])
        return sample_method() 
Example 15
Project: robosuite   Author: StanfordVL   File: demo_sampler_wrapper.py    MIT License 6 votes vote down vote up
def _uniform_sample(self):
        """
        Sampling method.

        First uniformly sample a demonstration from the set of demonstrations.
        Then uniformly sample a state from the selected demonstration.
        """

        # get a random episode index
        ep_ind = random.choice(self.demo_list)

        # select a flattened mujoco state uniformly from this episode
        states = self.demo_file["data/{}/states".format(ep_ind)].value
        state = random.choice(states)

        if self.need_xml:
            model_xml = self._xml_for_episode_index(ep_ind)
            xml = postprocess_model_xml(model_xml)
            return state, xml
        return state 
Example 16
Project: douyin   Author: luocaiwei   File: douyin.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def gen_device_data():
    client_uuid = "".join(random.sample(string.digits * 2, 15))
    serial_number = "".join(random.sample(string.digits + "abcdef", 16))
    openudid = "".join(random.sample(string.digits * 2, 16))
    data = {"time_sync": {"local_time": str(int(time())), "server_time": str(int(time()))},
            "magic_tag": "ss_app_log",
            "header": {"sdk_version": 1132, "language": "zh",
                       "user_agent": "okhttp/2.9.0",
                       "app_name": "aweme", "app_version": "2.9.0", "is_upgrade_user": 0, "region": "CN",
                       "vendor_id": serial_number, "app_region": "CN",
                       "channel": "App Store", "mcc_mnc": "46001",
                       "custom": {"app_region": "CN", "build_number": "29001", "app_language": "zh"},
                       "resolution": "1125*2436", "aid": "1128", "os": "Android", "tz_offset": 28800,
                       "access": "WIFI", "openudid": openudid,
                       "carrier": "%D6%D0%B9%FA%D2%C6%B6%AF", "is_jailbroken": 0, "os_version": "11.4",
                       "app_language": "zh", "device_model": "OnePlus",
                       "display_name": "%B6%B6%D2%F4%B6%CC%CA%D3%C6%B5", "mc": "02:00:00:00:00:00",
                       "package": "com.ss.android.ugc.Aweme", "timezone": 8, "tz_name": "Asia\/Shanghai",
                       "idfa": client_uuid}, "fingerprint": ""}
    return data,(openudid,serial_number,client_uuid) 
Example 17
Project: ngo-addons-backport   Author: camptocamp   File: test_serving_base.py    GNU Affero General Public License v3.0 6 votes vote down vote up
def test_topological_sort(self):
        random.shuffle(self.mods)
        modules = [
            (k, sample(self.mods[:i]))
            for i, k in enumerate(self.mods)]
        random.shuffle(modules)
        ms = dict(modules)

        seen = set()
        sorted_modules = sort(ms)
        for module in sorted_modules:
            deps = ms[module]
            self.assertGreaterEqual(
                seen, set(deps),
                        'Module %s (index %d), ' \
                        'missing dependencies %s from loaded modules %s' % (
                    module, sorted_modules.index(module), deps, seen
                ))
            seen.add(module) 
Example 18
Project: mrbait   Author: tkchafin   File: alignment_tools.py    GNU General Public License v3.0 6 votes vote down vote up
def get_iupac(char):
	iupac = {
		"A"	: ["A", "A"],
		"G"	: ["G", "G"],
		"C"	: ["C", "C"],
		"T"	: ["T", "T"],
		"N"	: ["N", "N"],
		"-"	: ["-", "-"],
		"R"	: ["A","G"],
		"Y"	: ["C","T"],
		"S"	: ["G","C"],
		"W"	: ["A","T"],
		"K"	: ["G","T"],
		"M"	: ["A","C"],
		"B"	: random.sample(["C","G","T"], 2),
		"D"	: random.sample(["A","G","T"], 2),
		"H"	: random.sample(["A","C","T"], 2),
		"V"	: random.sample(["A","C","G"], 2)
	}
	return iupac[char]

#Function to translate a string of bases to an iupac ambiguity code 
Example 19
Project: mutatest   Author: EvanKepner   File: run.py    MIT License 5 votes vote down vote up
def get_sample(ggrp: GenomeGroup, ignore_coverage: bool) -> List[GenomeGroupTarget]:
    """Get the sample space for the mutation trials.

    This will attempt to use covered-targets as the default unless ``ignore_coverage`` is set
    to True. If the set .coverage file is not found then the total targets are returned instead.

    Args:
        ggrp: the Genome Group to generate the sample space of targets
        ignore_coverage: flag to ignore coverage if present

    Returns:
        Sorted list of Path-LocIndex pairs as complete sample space from the ``GenomeGroup``.
    """
    if ignore_coverage:
        LOGGER.info("Ignoring coverage file for sample space creation.")

    try:
        sample = ggrp.targets if ignore_coverage else ggrp.covered_targets

    except FileNotFoundError:
        LOGGER.info("Coverage file does not exist, proceeding to sample from all targets.")
        sample = ggrp.targets

    # sorted list used for repeat trials using random seed instead of set
    sort_by_keys = attrgetter(
        "source_path",
        "loc_idx.lineno",
        "loc_idx.col_offset",
        "loc_idx.end_lineno",
        "loc_idx.end_col_offset",
    )
    return sorted(sample, key=sort_by_keys) 
Example 20
Project: meta-transfer-learning   Author: erfaneshrati   File: reptile.py    MIT License 5 votes vote down vote up
def _mini_batches(samples, batch_size, num_batches, replacement):
    """
    Generate mini-batches from some data.

    Returns:
      An iterable of sequences of (input, label) pairs,
        where each sequence is a mini-batch.
    """
    samples = list(samples)
    if replacement:
        for _ in range(num_batches):
            yield random.sample(samples, batch_size)
        return
    cur_batch = []
    batch_count = 0
    while True:
        random.shuffle(samples)
        for sample in samples:
            cur_batch.append(sample)
            if len(cur_batch) < batch_size:
                continue
            yield cur_batch
            cur_batch = []
            batch_count += 1
            if batch_count == num_batches:
                return 
Example 21
Project: pyblish-win   Author: pyblish   File: test_dict.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_literal_constructor(self):
        # check literal constructor for different sized dicts
        # (to exercise the BUILD_MAP oparg).
        for n in (0, 1, 6, 256, 400):
            items = [(''.join(random.sample(string.letters, 8)), i)
                     for i in range(n)]
            random.shuffle(items)
            formatted_items = ('{!r}: {:d}'.format(k, v) for k, v in items)
            dictliteral = '{' + ', '.join(formatted_items) + '}'
            self.assertEqual(eval(dictliteral), dict(items)) 
Example 22
Project: pyblish-win   Author: pyblish   File: test_random.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_sample(self):
        # For the entire allowable range of 0 <= k <= N, validate that
        # the sample is of the correct length and contains only unique items
        N = 100
        population = xrange(N)
        for k in xrange(N+1):
            s = self.gen.sample(population, k)
            self.assertEqual(len(s), k)
            uniq = set(s)
            self.assertEqual(len(uniq), k)
            self.assertTrue(uniq <= set(population))
        self.assertEqual(self.gen.sample([], 0), [])  # test edge case N==k==0 
Example 23
Project: pyblish-win   Author: pyblish   File: test_random.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_sample_inputs(self):
        # SF bug #801342 -- population can be any iterable defining __len__()
        self.gen.sample(set(range(20)), 2)
        self.gen.sample(range(20), 2)
        self.gen.sample(xrange(20), 2)
        self.gen.sample(str('abcdefghijklmnopqrst'), 2)
        self.gen.sample(tuple('abcdefghijklmnopqrst'), 2) 
Example 24
Project: python-samples   Author: dek-odoo   File: dek_program079.py    Apache License 2.0 5 votes vote down vote up
def main(startLimit, endLimit):
    print random.sample([number for number in range(startLimit, endLimit + 1)
                         if number % 2 == 0], 5) 
Example 25
Project: python-samples   Author: dek-odoo   File: dek_program080.py    Apache License 2.0 5 votes vote down vote up
def main(startLimit, endLimit):
    print random.sample([number for number in range(startLimit, endLimit + 1)
                         if number % 5 == 0 and number % 7 == 0], 5) 
Example 26
Project: python-samples   Author: dek-odoo   File: dek_program078.py    Apache License 2.0 5 votes vote down vote up
def main(startLimit, endLimit):
    print random.sample(range(startLimit, endLimit + 1), 5) 
Example 27
Project: claxon   Author: vanatteveldt   File: ml.py    GNU General Public License v3.0 5 votes vote down vote up
def retrain(project: Project, iterations=10):
    annotations = list(Annotation.objects.filter(session__project_id=project.id, document__gold=False))
    train_eval = sample(annotations, 500) if len(annotations) > 500 else annotations
    logging.info("Retraining model using {} annotations".format(len(annotations)))
    def log_performance(i, model):
        evals = evaluate(project, model)
        evals_t = evaluate(project, model, train_eval)
        logging.info("It.{:2}: {} (train: {})".format(i, combine(evals).eval_str(label=""), combine(evals_t).eval_str(label="")))
    model = train(project, annotations, iterations, callback=log_performance)
    evals = evaluate(project, model)

    save_model(model, project)
    project.model_evaluation = json.dumps([e.to_dict() for e in evals])
    project.save()
    logging.info("Done retraining!") 
Example 28
Project: comet-commonsense   Author: atcbosselut   File: atomic.py    Apache License 2.0 5 votes vote down vote up
def select_partial_dataset(data_opts, data):
    num_selections = math.ceil(data_opts.kr * len(data))
    return random.sample(data, num_selections) 
Example 29
Project: ieml   Author: IEMLdev   File: tools.py    GNU General Public License v3.0 5 votes vote down vote up
def word(self):
        return Word(random.sample(self.scripts, 1)[0]) 
Example 30
Project: ieml   Author: IEMLdev   File: tools.py    GNU General Public License v3.0 5 votes vote down vote up
def uniterm_topic(self):
        return topic([random.sample(self.scripts, 1)]) 
Example 31
Project: ieml   Author: IEMLdev   File: tools.py    GNU General Public License v3.0 5 votes vote down vote up
def topic(self):
        return topic([Word(t) for t in random.sample(self.scripts, 3)],
                     [Word(t) for t in random.sample(self.scripts, 2)]) 
Example 32
Project: ieml   Author: IEMLdev   File: tools.py    GNU General Public License v3.0 5 votes vote down vote up
def fact(self, max_clause=6):
        def p():
            return random.sample(self.topics_pool, 1)[0]

        return fact(self._build_graph_object(p, p, max_nodes=max_clause)) 
Example 33
Project: ieml   Author: IEMLdev   File: tools.py    GNU General Public License v3.0 5 votes vote down vote up
def text(self):
        return text(random.sample(self.propositions_pool, random.randint(1, 8))) 
Example 34
Project: ieml   Author: IEMLdev   File: test_descriptor.py    GNU General Public License v3.0 5 votes vote down vote up
def rand_entry():
    return {l: sample(ascii_lowercase, 15) for l in LANGUAGES} 
Example 35
Project: ieml   Author: IEMLdev   File: test_usl.py    GNU General Public License v3.0 5 votes vote down vote up
def test_deference_path(self):
        u = random_usl(rank_type=Text)
        p = random.sample(tuple(u.paths.items()), 1)
        self.assertEqual(u[p[0][0]], p[0][1]) 
Example 36
Project: kuaa   Author: rafaelwerneck   File: subset.py    GNU General Public License v3.0 5 votes vote down vote up
def random_selection(dataset, subset_size):
	l = sum(1 for line in open(dataset,'r'))
	return sorted(random.sample(xrange(l), subset_size)) 
Example 37
Project: kuaa   Author: rafaelwerneck   File: subset.py    GNU General Public License v3.0 5 votes vote down vote up
def stratified_selection(dataset, subset_size):
	labels = [line.split(None,1)[0] for line in open(dataset)]
	label_linenums = defaultdict(list)
	for i, label in enumerate(labels):
		label_linenums[label] += [i]

	l = len(labels)
	remaining = subset_size
	ret = []

	# classes with fewer data are sampled first; otherwise
	# some rare classes may not be selected
	for label in sorted(label_linenums, key=lambda x: len(label_linenums[x])):
		linenums = label_linenums[label]
		label_size = len(linenums) 
		# at least one instance per class
		s = int(min(remaining, max(1, math.ceil(label_size*(float(subset_size)/l)))))
		if s == 0:
			sys.stderr.write('''\
Error: failed to have at least one instance per class
    1. You may have regression data.
    2. Your classification data is unbalanced or too small.
Please use -s 1.
''')
			sys.exit(-1)
		remaining -= s
		ret += [linenums[i] for i in random.sample(xrange(label_size), s)]
	return sorted(ret) 
Example 38
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_MODS.py    MIT License 5 votes vote down vote up
def Dset(self, ndataset=7, name='MODS_data.pkl'):
         '''
         function to build datasets. ndataset: number of datasets wanted; 
         name: pkl file where the data from DSetGlobal is stored. Code makes sure
         that there is the same ratio of positive/negative images in each dataset.
         This is done, setting a lmda. If you set a really low lmda, you might have
         to stop the program and rerun it a few times.
         Returns a pkl with a segmented dataset. seg_data is a list of n lists, where n
         is the number of datasets desired. These n lists consist of 2 lists: the data
         and its corresponding labels.
         '''
         f = file(name, 'rb')
         datapapa = cPickle.load(f)
         f.close()    
         w = datapapa[0]
         x = datapapa[1]
         y = range(len(x))
         seg_data = []
         counter = 0
         size = int(len(y)/float(ndataset))
         while counter < ndataset:
             z = random.sample(y, size)
             lmda = 0.0005
             ratio = float(sum([x[i] for i in z]))/(len([x[i] for i in z if x[i]==0]))
             print(ratio)
             dif = math.fabs(ratio-1) #ratio of positive to negatives
             if dif < lmda:
                 print('BINGO!', counter, dif)
                 y = [i for i in y if i not in z]
                 current_label = [x[i] for i in z]
                 current_data = [w[i] for i in z]
                 seg_data.append([current_data, current_label])
                 counter+=1
             else:
                 #print('Does not have a acceptable ratio', ratio, dif)
                 #fun+= 1
                 pass 
         f = file('seg_MODS_data.pkl', 'wb')
         cPickle.dump(seg_data, f, protocol=cPickle.HIGHEST_PROTOCOL)
         f.close() 
Example 39
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_MODS.py    MIT License 5 votes vote down vote up
def Dset(self, ndataset=7, name='MODS_data.pkl'):
         '''
         function to build datasets. ndataset: number of datasets wanted; 
         name: pkl file where the data from DSetGlobal is stored. Code makes sure
         that there is the same ratio of positive/negative images in each dataset.
         This is done, setting a lmda. If you set a really low lmda, you might have
         to stop the program and rerun it a few times.
         Returns a pkl with a segmented dataset. seg_data is a list of n lists, where n
         is the number of datasets desired. These n lists consist of 2 lists: the data
         and its corresponding labels.
         '''
         f = file(name, 'rb')
         datapapa = cPickle.load(f)
         f.close()    
         w = datapapa[0]
         x = datapapa[1]
         y = range(len(x))
         seg_data = []
         counter = 0
         size = int(len(y)/float(ndataset))
         while counter < ndataset:
             z = random.sample(y, size)
             lmda = 0.0005
             ratio = float(sum([x[i] for i in z]))/(len([x[i] for i in z if x[i]==0]))
             print(ratio)
             dif = math.fabs(ratio-1) #ratio of positive to negatives
             if dif < lmda:
                 print('BINGO!', counter, dif)
                 y = [i for i in y if i not in z]
                 current_label = [x[i] for i in z]
                 current_data = [w[i] for i in z]
                 seg_data.append([current_data, current_label])
                 counter+=1
             else:
                 #print('Does not have a acceptable ratio', ratio, dif)
                 #fun+= 1
                 pass 
         f = file('seg_MODS_data.pkl', 'wb')
         cPickle.dump(seg_data, f, protocol=cPickle.HIGHEST_PROTOCOL)
         f.close() 
Example 40
Project: subword-qac   Author: clovaai   File: generate.py    MIT License 5 votes vote down vote up
def main(args):
    logger.info(f"Args: {json.dumps(args.__dict__, indent=2, sort_keys=True)}")

    spm_path = os.path.join('spm', args.spm, "spm.model")
    logger.info(f"Loading tokenizer from {spm_path}")
    tokenizer = Tokenizer(spm_path)
    args.ntoken = ntoken = len(tokenizer)
    args.branching_factor = min([args.branching_factor, args.ntoken])
    logger.info(f"  Vocab size: {ntoken}")

    n_queries_str = f"{f'only {args.n_queries} samples' if args.n_queries else 'all'} quries from"
    logger.info(f"Reading a dataset ({n_queries_str} test.query.txt)")
    seen_set = set(read_data(os.path.join(args.data_dir, "train.query.txt"), min_len=args.min_len))
    test_data = read_data(os.path.join(args.data_dir, "test.query.txt"), min_len=args.min_len)
    if args.n_queries:
        random.seed(args.seed)
        test_data = random.sample(test_data, args.n_queries)
    n_seen_test_data = len([x for x in test_data if x in seen_set])
    n_unseen_test_data = len(test_data) - n_seen_test_data
    logger.info(f"  Number of test data: {len(test_data):8d} (seen {n_seen_test_data}, unseen {n_unseen_test_data})")

    logger.info(f"Loading model from {args.model_dir}")
    model = model_load(args.model_dir)
    model = model.to(device)

    logger.info('Generation starts!')
    with torch.no_grad():
        generate(model, tokenizer, test_data, args, seen_set=seen_set, calc_mrl=args.calc_mrl) 
Example 41
Project: programsynthesishunting   Author: flexgp   File: crossover.py    GNU General Public License v3.0 5 votes vote down vote up
def crossover(parents):
    """
    Perform crossover on a population of individuals. The size of the crossover
    population is defined as params['GENERATION_SIZE'] rather than params[
    'POPULATION_SIZE']. This saves on wasted evaluations and prevents search
    from evaluating too many individuals.
    
    :param parents: A population of parent individuals on which crossover is to
    be performed.
    :return: A population of fully crossed over individuals.
    """

    # Initialise an empty population.
    cross_pop = []

    breakout_counter = 0
    while len(cross_pop) < params['GENERATION_SIZE']:
        # Randomly choose two parents from the parent population.
        inds_in = sample(parents, 2)

        # Perform crossover on chosen parents.
        inds_out = crossover_inds(inds_in[0], inds_in[1])

        if breakout_counter > 500:
            inds_out = [inds_in[0], inds_in[1]]

        if inds_out is None:
            # Crossover failed.
            breakout_counter += 1

        else:
            # Extend the new population.
            cross_pop.extend(inds_out)
            breakout_counter = 0

    return cross_pop 
Example 42
Project: programsynthesishunting   Author: flexgp   File: selection.py    GNU General Public License v3.0 5 votes vote down vote up
def tournament(population) -> List[Individual]:
    """
    Given an entire population, draw <tournament_size> competitors randomly and
    return the best. Unless INVALID_SELECTION is true, only valid individuals
    can be selected for tournaments.

    :param population: A population from which to select individuals.
    :return: A population of the winners from tournaments.
    """

    # Initialise list of tournament winners.
    winners = []

    # The flag "INVALID_SELECTION" allows for selection of invalid individuals.
    if params['INVALID_SELECTION']:
        available = population
    else:
        available = [i for i in population if not i.invalid]

    while len(winners) < params['GENERATION_SIZE']:
        # Randomly choose TOURNAMENT_SIZE competitors from the given
        # population. Allows for re-sampling of individuals.
        competitors = sample(available, params['TOURNAMENT_SIZE'])

        # Return the single best competitor.
        winners.append(max(competitors))

    # Return the population of tournament winners.
    return winners 
Example 43
Project: programsynthesishunting   Author: flexgp   File: selection.py    GNU General Public License v3.0 5 votes vote down vote up
def pareto_tournament(population, pareto, tournament_size) -> List[Individual]:
    """
    The Pareto tournament selection uses both the pareto front of the
    individual and the crowding distance.

    :param population: A population from which to select individuals.
    :param pareto: The pareto front information.
    :param tournament_size: The size of the tournament.
    :return: The selected individuals.
    """
    
    # Initialise no best solution.
    best = None
    
    # Randomly sample *tournament_size* participants.
    participants = sample(population, tournament_size)
    
    for participant in participants:
        if best is None or crowded_comparison_operator(participant, best,
                                                       pareto):
            best = participant
    
    return best


# Set attributes for all operators to define multi-objective operators. 
Example 44
Project: Ansible-Example-AB2018   Author: umit-ozturk   File: ec2_vpc_nat_gateway.py    MIT License 5 votes vote down vote up
def allocate_eip_address(client, check_mode=False):
    """Release an EIP from your EIP Pool
    Args:
        client (botocore.client.EC2): Boto3 client

    Kwargs:
        check_mode (bool): if set to true, do not run anything and
            falsify the results.

    Basic Usage:
        >>> client = boto3.client('ec2')
        >>> allocate_eip_address(client)
        True

    Returns:
        Tuple (bool, str)
    """
    ip_allocated = False
    new_eip = None
    err_msg = ''
    params = {
        'Domain': 'vpc',
    }
    try:
        if check_mode:
            ip_allocated = True
            random_numbers = (
                ''.join(str(x) for x in random.sample(range(0, 9), 7))
            )
            new_eip = 'eipalloc-{0}'.format(random_numbers)
        else:
            new_eip = client.allocate_address(**params)['AllocationId']
            ip_allocated = True
        err_msg = 'eipalloc id {0} created'.format(new_eip)

    except botocore.exceptions.ClientError as e:
        err_msg = str(e)

    return ip_allocated, err_msg, new_eip 
Example 45
Project: flasky   Author: RoseOu   File: address.py    MIT License 5 votes vote down vote up
def street_number():
    """Random street number."""
    length = int(random.choice(string.digits[1:6]))
    return ''.join(random.sample(string.digits, length)) 
Example 46
Project: flasky   Author: RoseOu   File: lorem_ipsum.py    MIT License 5 votes vote down vote up
def sentences(quantity=2, as_list=False):
    """Random sentences."""
    result = [sntc.strip() for sntc in
              random.sample(get_dictionary('lorem_ipsum'), quantity)]

    if as_list:
        return result
    else:
        return ' '.join(result) 
Example 47
Project: flasky   Author: RoseOu   File: basic.py    MIT License 5 votes vote down vote up
def hex_color():
    """Random HEX color."""
    return ''.join(random.sample(HEX_DIGITS, 6)) 
Example 48
Project: flasky   Author: RoseOu   File: basic.py    MIT License 5 votes vote down vote up
def hex_color_short():
    """Random short (e.g. `FFF` color)."""
    return ''.join(random.sample(HEX_DIGITS, 3)) 
Example 49
Project: rl_3d   Author: avdmitry   File: agent_dqn.py    MIT License 5 votes vote down vote up
def Get(self, sample_size):

        idx = random.sample(xrange(0, self.size-2), sample_size)
        idx2 = []
        for i in idx:
            idx2.append(i + 1)
        return self.s[idx], self.a[idx], self.s[idx2], self.isterminal[idx], self.r[idx] 
Example 50
Project: fuku-ml   Author: fukuball   File: PocketPLA.py    MIT License 5 votes vote down vote up
def train(self):

        '''
        Train Pocket Perceptron Learning Algorithm
        From f(x) = WX
        Find best h(x) = WX similar to f(x)
        Output W
        '''

        if (self.status != 'init'):
            print("Please load train data and init W first.")
            return self.W

        self.status = 'train'

        new_W = self.W

        self.temp_avg_error = self.calculate_avg_error(self.train_X, self.train_Y, new_W)

        for _ in range(self.updates):
            if (self.loop_mode is 'naive_cycle'):
                data_check_order = range(self.data_num)
            elif (self.loop_mode is 'random'):
                data_check_order = range(self.data_num)
                data_check_order = random.sample(data_check_order, self.data_num)
            else:
                data_check_order = range(self.data_num)
                data_check_order = random.sample(data_check_order, self.data_num)
            for i in data_check_order:

                if self.error_function(self.score_function(self.train_X[i], new_W), self.train_Y[i]):
                    self.tune_times += 1
                    new_W = new_W + self.step_alpha * (self.train_Y[i] * self.train_X[i])
                    new_avg_error = self.calculate_avg_error(self.train_X, self.train_Y, new_W)
                    if new_avg_error < self.temp_avg_error:
                        self.put_in_pocket_times += 1
                        self.temp_avg_error = new_avg_error
                        self.W = new_W
                    break

        return self.W 
Example 51
Project: Scene-Understanding   Author: foamliu   File: data_generator.py    MIT License 5 votes vote down vote up
def split_data():
    filename = os.path.join(folder_metadata, 'SUNRGBDMeta.mat')
    meta = hdf5storage.loadmat(filename)
    names = []
    for item in meta['SUNRGBDMeta'][0]:
        name = item[0][0]
        names.append(name)

    num_samples = len(names)  # 10335
    print('num_samples: ' + str(num_samples))

    num_train_samples = int(num_samples * 0.8)
    print('num_train_samples: ' + str(num_train_samples))
    num_valid_samples = num_samples - num_train_samples
    print('num_valid_samples: ' + str(num_valid_samples))
    valid_ids = random.sample(range(num_train_samples), num_valid_samples)
    valid_ids = list(map(str, valid_ids))
    train_ids = [str(n) for n in range(num_train_samples) if n not in valid_ids]
    shuffle(valid_ids)
    shuffle(train_ids)

    with open('names.txt', 'w') as file:
        file.write('\n'.join(names))

    with open('valid_ids.txt', 'w') as file:
        file.write('\n'.join(valid_ids))

    with open('train_ids.txt', 'w') as file:
        file.write('\n'.join(train_ids)) 
Example 52
Project: DOTA_models   Author: ringringyi   File: trainer_lib.py    Apache License 2.0 5 votes vote down vote up
def run_training_step(sess, trainer, train_corpus, batch_size):
  """Runs a single iteration of train_op on a randomly sampled batch."""
  batch = random.sample(train_corpus, batch_size)
  sess.run(trainer['run'], feed_dict={trainer['input_batch']: batch}) 
Example 53
Project: DOTA_models   Author: ringringyi   File: replay_buffer.py    Apache License 2.0 5 votes vote down vote up
def remove_n(self, n):
    """Get n items for removal."""
    # random removal
    idxs = random.sample(xrange(self.init_length, self.cur_size), n)
    return idxs 
Example 54
Project: DOTA_models   Author: ringringyi   File: replay_buffer.py    Apache License 2.0 5 votes vote down vote up
def get_batch(self, n):
    """Get batch of episodes to train on."""
    # random batch
    idxs = random.sample(xrange(self.cur_size), n)
    return [self.buffer[idx] for idx in idxs], None 
Example 55
Project: suffix-trees   Author: ptrus   File: test3.py    MIT License 5 votes vote down vote up
def string_and_not_substring(draw):
    x = draw(text(min_size=2, alphabet=string.printable))
    i = draw(integers(min_value=1, max_value=len(x)-1))
    y = ''.join(sample(x, i))
    assume(x.find(y) == -1)
    return (x, y) 
Example 56
Project: deep-nn-car   Author: scope-lab-vu   File: preprocess.py    MIT License 5 votes vote down vote up
def load_batch_data(data):
    #Loading batch training data
    if(data == 'Trial10'):
        print("Loading {} shuffled images from {} dataset".format(batch_size, data))
        input1, output1 = load_data('Trial10')
        assert len(input1) == len(output1)
        n = len(input1)
        #print(n)
        #n1 = len(input1)
        #print(n1)

        value = random.sample(range(0, n), batch_size)
        #print(value)
        #assert len(value) == batch_size

        x, y = [], []
        for i in value:
            x.append(input1[i])
            y.append(output1[i])

    #Loading batch validation data
    if(data == 'Trial5'):
        print("Loading {} shuffled images from {} dataset".format(batch_size, data))
        input1, output1 = load_data('Trial5')
        #assert len(input1) == len(output1)
        n = len(input1)
        print(n)
        n1 = len(input1)
        print(n1)


        value = random.sample(range(0, n), batch_size)
        #print(value)
        assert len(value) == batch_size

        x, y = [], []
        for i in value:
            x.append(input1[i])
            y.append(output1[i])

    return x, y 
Example 57
Project: robosuite   Author: StanfordVL   File: demo_sampler_wrapper.py    MIT License 5 votes vote down vote up
def _reverse_sample_open_loop(self):
        """
        Sampling method.

        Open loop reverse sampling from demonstrations. Starts by 
        sampling from states near the end of the demonstrations.
        Increases the window backwards as the number of calls to
        this sampling method increases at a fixed rate.
        """

        # get a random episode index
        ep_ind = random.choice(self.demo_list)

        # sample uniformly in a window that grows backwards from the end of the demos
        states = self.demo_file["data/{}/states".format(ep_ind)].value
        eps_len = states.shape[0]
        index = np.random.randint(max(eps_len - self.open_loop_window_size, 0), eps_len)
        state = states[index]

        # increase window size at a fixed frequency (open loop)
        self.demo_sampled += 1
        if self.demo_sampled >= self.open_loop_increment_freq:
            if self.open_loop_window_size < eps_len:
                self.open_loop_window_size += self.open_loop_window_increment
            self.demo_sampled = 0

        if self.need_xml:
            model_xml = self._xml_for_episode_index(ep_ind)
            xml = postprocess_model_xml(model_xml)
            return state, xml

        return state 
Example 58
Project: robosuite   Author: StanfordVL   File: demo_sampler_wrapper.py    MIT License 5 votes vote down vote up
def _forward_sample_open_loop(self):
        """
        Sampling method.

        Open loop forward sampling from demonstrations. Starts by
        sampling from states near the beginning of the demonstrations.
        Increases the window forwards as the number of calls to
        this sampling method increases at a fixed rate.
        """

        # get a random episode index
        ep_ind = random.choice(self.demo_list)

        # sample uniformly in a window that grows forwards from the beginning of the demos
        states = self.demo_file["data/{}/states".format(ep_ind)].value
        eps_len = states.shape[0]
        index = np.random.randint(0, min(self.open_loop_window_size, eps_len))
        state = states[index]

        # increase window size at a fixed frequency (open loop)
        self.demo_sampled += 1
        if self.demo_sampled >= self.open_loop_increment_freq:
            if self.open_loop_window_size < eps_len:
                self.open_loop_window_size += self.open_loop_window_increment
            self.demo_sampled = 0

        if self.need_xml:
            model_xml = self._xml_for_episode_index(ep_ind)
            xml = postprocess_model_xml(model_xml)
            return state, xml

        return state 
Example 59
Project: octapentaveega   Author: rakettitiede   File: ansitester.py    Apache License 2.0 5 votes vote down vote up
def rndclear(c = 32, fg = 7, bg = 0):
	serwrite("\x1B[3{0};4{1}m".format(fg,bg)) # Set colors
	for loc in random.sample(range(0, 512, 2), 256):
		move_to(int(loc  / 32) * 2, (loc / 2) % 16)
		serwrite(chr(c) + chr(c))
	serwrite("\x1B[H") # Move cursor to 0,0 
Example 60
Project: Pytorch-Project-Template   Author: moemen95   File: replay_memory.py    MIT License 5 votes vote down vote up
def sample_batch(self, batch_size):
        batch = random.sample(self.memory, batch_size)
        return batch 
Example 61
Project: cs294-112_hws   Author: xuwd11   File: replay.py    MIT License 5 votes vote down vote up
def sample(self, batch_size):
        random_batch = random.sample(self.memory, batch_size)
        return random_batch 
Example 62
Project: AshsSDK   Author: thehappydinoa   File: parser.py    MIT License 5 votes vote down vote up
def _free_cache_entries(self):
        for key in random.sample(self._CACHE.keys(), int(self._MAX_SIZE / 2)):
            del self._CACHE[key] 
Example 63
Project: GCN-VAE-opinion   Author: zxj32   File: preprocessing.py    MIT License 5 votes vote down vote up
def mask_test_edge_opinion(test_rat, index):
    b_all = np.load("/network/rit/lab/ceashpc/xujiang/project/GAE_ICDM2018/gae/traffic_data/pa_belief_T38_0.8.npy")
    u_all = np.load("/network/rit/lab/ceashpc/xujiang/project/GAE_ICDM2018/gae/traffic_data/pa_uncertain_T38_0.8.npy")
    # belief, uncertain = rb_data.get_dc_data()
    belief = b_all[index]
    uncertain = u_all[index]
    belief = np.reshape(belief, [len(belief), 1])
    uncertain = np.reshape(uncertain, [len(uncertain), 1])
    omega, a0, b0 = get_omega(belief, uncertain)
    random.seed(132)
    test_num = int(test_rat * len(belief))
    test_index = random.sample(range(len(belief)), test_num)
    train_mask = np.zeros_like(belief, dtype=bool)
    test_mask = np.zeros_like(belief, dtype=bool)
    y_train_belief = np.zeros_like(belief)
    y_test_belief = np.zeros_like(belief)
    y_train_un = np.zeros_like(belief)
    y_test_un = np.zeros_like(belief)
    for i in range(len(test_mask)):
        if i in test_index:
            y_test_belief[i] = belief[i]
            y_test_un[i] = uncertain[i]
            test_mask[i] = True
        else:
            y_train_belief[i] = belief[i]
            y_train_un[i] = uncertain[i]
            train_mask[i] = True
    # a1, b1 = get_omega_train(belief, uncertain, train_mask)
    return y_train_belief, y_test_belief, y_train_un, y_test_un, train_mask, test_mask, omega, a0, b0 
Example 64
Project: GCN-VAE-opinion   Author: zxj32   File: preprocessing.py    MIT License 5 votes vote down vote up
def mask_test_edge_epinion(test_rat, T):
    # b_all = np.load("/network/rit/lab/ceashpc/xujiang/project/GAE_TEST/gae/traffic_data/pa_belief_T38_0.8.npy")
    # u_all = np.load("/network/rit/lab/ceashpc/xujiang/project/GAE_TEST/gae/traffic_data/pa_uncertain_T38_0.8.npy")
    # # belief, uncertain = rb_data.get_dc_data()
    # belief = b_all[index]
    # uncertain = u_all[index]
    belief, uncertain, test_index = rb_data.get_epinion_data(T)

    belief = np.reshape(belief, [len(belief), 1])
    uncertain = np.reshape(uncertain, [len(uncertain), 1])
    omega, a0, b0 = get_omega(belief, uncertain)
    # random.seed(132)
    # test_num = int(test_rat * len(belief))
    # test_index = random.sample(range(len(belief)), test_num)
    train_mask = np.zeros_like(belief, dtype=bool)
    test_mask = np.zeros_like(belief, dtype=bool)
    y_train_belief = np.zeros_like(belief)
    y_test_belief = np.zeros_like(belief)
    y_train_un = np.zeros_like(belief)
    y_test_un = np.zeros_like(belief)
    for i in range(len(test_mask)):
        if i in test_index:
            y_test_belief[i] = belief[i]
            y_test_un[i] = uncertain[i]
            test_mask[i] = True
        else:
            y_train_belief[i] = belief[i]
            y_train_un[i] = uncertain[i]
            train_mask[i] = True
    return y_train_belief, y_test_belief, y_train_un, y_test_un, train_mask, test_mask, omega, a0, b0 
Example 65
Project: GCN-VAE-opinion   Author: zxj32   File: preprocessing.py    MIT License 5 votes vote down vote up
def mask_test_edge_opinion_beijing(test_ratio):
    # belief = np.load("/network/rit/lab/ceashpc/xujiang/project/GAE_TEST/gae/data/synthetic_belief_noise10.npy")
    # uncertain = np.load("/network/rit/lab/ceashpc/xujiang/project/GAE_TEST/gae/data/synthetic_uncertain_noise10.npy")
    # _, belief = generate_synthetic_belief2(500, noise)
    # _, uncertain = generate_synthetic_uncertain2(500, noise)
    belief = np.load("./traffic_data/belief_undirect_beijing.npy")
    uncertain = np.load("./traffic_data/uncertain_undirect_beijing.npy")
    belief = np.reshape(belief, [len(belief), 1])
    uncertain = np.reshape(uncertain, [len(uncertain), 1])
    omega = get_omega(belief, uncertain)
    random.seed(132)
    test_index = random.sample(range(len(belief)), int(len(belief) * test_ratio))
    train_mask = np.zeros_like(belief, dtype=bool)
    test_mask = np.zeros_like(belief, dtype=bool)
    y_train_belief = np.zeros_like(belief)
    y_test_belief = np.zeros_like(belief)
    y_train_un = np.zeros_like(belief)
    y_test_un = np.zeros_like(belief)
    for i in range(len(test_mask)):
        if i in test_index:
            y_test_belief[i] = belief[i]
            y_test_un[i] = uncertain[i]
            test_mask[i] = True
        else:
            y_train_belief[i] = belief[i]
            y_train_un[i] = uncertain[i]
            train_mask[i] = True
    return y_train_belief, y_test_belief, y_train_un, y_test_un, train_mask, test_mask, omega 
Example 66
Project: GCN-VAE-opinion   Author: zxj32   File: preprocessing.py    MIT License 5 votes vote down vote up
def mask_test_edge_opinion_f(test_num, noise):
    # belief = np.load("/network/rit/lab/ceashpc/xujiang/project/GAE_TEST/gae/data/synthetic_belief_noise10.npy")
    # uncertain = np.load("/network/rit/lab/ceashpc/xujiang/project/GAE_TEST/gae/data/synthetic_uncertain_noise10.npy")
    _, belief = generate_synthetic_belief2(500, noise)
    _, uncertain = generate_synthetic_uncertain2(500, noise)
    features = np.zeros([len(belief), 2])
    features[:, 0] = belief
    features[:, 1] = uncertain
    belief = np.reshape(belief, [len(belief), 1])
    uncertain = np.reshape(uncertain, [len(uncertain), 1])
    omega = get_omega(belief, uncertain)
    random.seed(132)
    test_index = random.sample(range(len(belief)), test_num)
    train_mask = np.zeros_like(belief, dtype=bool)
    test_mask = np.zeros_like(belief, dtype=bool)
    y_train_belief = np.zeros_like(belief)
    y_test_belief = np.zeros_like(belief)
    y_train_un = np.zeros_like(belief)
    y_test_un = np.zeros_like(belief)
    for i in range(len(test_mask)):
        if i in test_index:
            features[i][0] = 0.0
            features[i][1] = 0.0
            y_test_belief[i] = belief[i]
            y_test_un[i] = uncertain[i]
            test_mask[i] = True
        else:
            y_train_belief[i] = belief[i]
            y_train_un[i] = uncertain[i]
            train_mask[i] = True
    features = sparse.csr_matrix(features)
    return y_train_belief, y_test_belief, y_train_un, y_test_un, train_mask, test_mask, omega, features 
Example 67
Project: pytetra   Author: Tim---   File: convolutional.py    GNU General Public License v2.0 5 votes vote down vote up
def bench_correction():
        import random
        for numerrors in range(5):
            s = 0
            for i in range(1000):
                b3e = b3[:]
                for x in random.sample(range(len(b3)), numerrors):
                    b3e[x] ^= 1
                if c2(b3e) == b2:
                    s += 1
            print numerrors, s / 1000. 
Example 68
Project: wrangle   Author: autonomio   File: dic_resample_values.py    MIT License 5 votes vote down vote up
def dic_resample_values(params, n):

    import random

    for param in params.keys():
        try:
            params[param] = random.sample(params[param], k=n)
        except ValueError:
            pass

    return params 
Example 69
Project: ngo-addons-backport   Author: camptocamp   File: test_serving_base.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def sample(population):
    return random.sample(
        population,
            random.randint(0, min(len(population), 5))) 
Example 70
Project: ngo-addons-backport   Author: camptocamp   File: share_wizard.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def generate_random_pass():
    return ''.join(random.sample(RANDOM_PASS_CHARACTERS,10)) 
Example 71
Project: mrbait   Author: tkchafin   File: benchmark_lookups.py    GNU General Public License v3.0 5 votes vote down vote up
def make_random_df(size):
	ids = list(range(size))
	rands = random.sample(range(size*10),size)
	df = pd.DataFrame({'id' : ids,'weight' : rands})
	return df

#Function to make a random df 
Example 72
Project: mrbait   Author: tkchafin   File: benchmark_lookups.py    GNU General Public License v3.0 5 votes vote down vote up
def make_random_dict(size):
	ids = random.sample(range(size*10),size)
	rands = random.sample(range(size*10),size)
	d = {}
	counter = 0
	for i in ids:
		d[i] = rands[counter]
		counter +=1
	return d

#Function to make a random df 
Example 73
Project: mutatest   Author: EvanKepner   File: run.py    MIT License 4 votes vote down vote up
def get_mutation_sample_locations(
    sample_space: List[GenomeGroupTarget], n_locations: int
) -> List[GenomeGroupTarget]:
    """Create the mutation sample space and set n_locations to a correct value for reporting.

    ``n_locations`` will change if it is larger than the total sample_space.

    Args:
        sample_space: sample space to draw random locations from
        n_locations: number of locations to draw

    Returns:
        mutation sample
    """
    # set the mutation sample to the full sample space
    # then if max_trials is set and less than the size of the sample space
    # take a random sample without replacement
    mutation_sample = sample_space

    # natural Falsey evaluation of n_locations=0 requires exact None check
    if n_locations <= 0:
        raise ValueError("n_locations must be greater or equal to zero.")

    if n_locations <= len(sample_space):
        LOGGER.info(
            "%s",
            colorize_output(
                f"Selecting {n_locations} locations from {len(sample_space)} potentials.", "green"
            ),
        )
        mutation_sample = random.sample(sample_space, k=n_locations)

    else:
        # set here for final reporting, though not used in rest of trial controls
        LOGGER.info(
            "%s",
            colorize_output(
                f"{n_locations} exceeds sample space, using full sample: {len(sample_space)}.",
                "yellow",
            ),
        )

    return mutation_sample 
Example 74
Project: meta-transfer-learning   Author: erfaneshrati   File: reptile.py    MIT License 4 votes vote down vote up
def train_step(self,
                   dataset,
                   input_ph,
                   label_ph,
                   minimize_op,
                   num_classes,
                   num_shots,
                   inner_batch_size,
                   inner_iters,
                   replacement,
                   meta_step_size,
                   meta_batch_size):
        """
        Perform a Reptile training step.

        Args:
          dataset: a sequence of data classes, where each data
            class has a sample(n) method.
          input_ph: placeholder for a batch of samples.
          label_ph: placeholder for a batch of labels.
          minimize_op: TensorFlow Op to minimize a loss on the
            batch specified by input_ph and label_ph.
          num_classes: number of data classes to sample.
          num_shots: number of examples per data class.
          inner_batch_size: batch size for every inner-loop
            training iteration.
          inner_iters: number of inner-loop iterations.
          replacement: sample with replacement.
          meta_step_size: interpolation coefficient.
          meta_batch_size: how many inner-loops to run.
        """
        old_vars = self._model_state.export_variables()
        new_vars = []
        for _ in range(meta_batch_size):
            mini_dataset = _sample_mini_dataset(dataset, num_classes, num_shots)
            for batch in _mini_batches(mini_dataset, inner_batch_size, inner_iters, replacement):
                inputs, labels = zip(*batch)
                if self._pre_step_op:
                    self.session.run(self._pre_step_op)
                self.session.run(minimize_op, feed_dict={input_ph: inputs, label_ph: labels})
            new_vars.append(self._model_state.export_variables())
            self._model_state.import_variables(old_vars)
        new_vars = average_vars(new_vars)
        self._model_state.import_variables(interpolate_vars(old_vars, new_vars, meta_step_size))
    # pylint: disable=R0913,R0914 
Example 75
Project: meta-transfer-learning   Author: erfaneshrati   File: reptile.py    MIT License 4 votes vote down vote up
def evaluate(self,
                 dataset,
                 input_ph,
                 label_ph,
                 minimize_op,
                 predictions,
                 num_classes,
                 num_shots,
                 inner_batch_size,
                 inner_iters,
                 replacement):
        """
        Run a single evaluation of the model.

        Samples a few-shot learning task and measures
        performance.

        Args:
          dataset: a sequence of data classes, where each data
            class has a sample(n) method.
          input_ph: placeholder for a batch of samples.
          label_ph: placeholder for a batch of labels.
          minimize_op: TensorFlow Op to minimize a loss on the
            batch specified by input_ph and label_ph.
          predictions: a Tensor of integer label predictions.
          num_classes: number of data classes to sample.
          num_shots: number of examples per data class.
          inner_batch_size: batch size for every inner-loop
            training iteration.
          inner_iters: number of inner-loop iterations.
          replacement: sample with replacement.

        Returns:
          The number of correctly predicted samples.
            This always ranges from 0 to num_classes.
        """
        train_set, test_set = _split_train_test(
            _sample_mini_dataset(dataset, num_classes, num_shots+1))
        old_vars = self._full_state.export_variables()
        for batch in _mini_batches(train_set, inner_batch_size, inner_iters, replacement):
            inputs, labels = zip(*batch)
            if self._pre_step_op:
                self.session.run(self._pre_step_op)
            self.session.run(minimize_op, feed_dict={input_ph: inputs, label_ph: labels})
        test_preds = self._test_predictions(train_set, test_set, input_ph, predictions)
        num_correct = sum([pred == sample[1] for pred, sample in zip(test_preds, test_set)])
        self._full_state.import_variables(old_vars)
        return num_correct 
Example 76
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_labs_MODS.py    MIT License 4 votes vote down vote up
def Dset(self, v, ndataset=5, name='MODS_data.pkl'):
         '''
         function to build datasets. ndataset: number of datasets wanted; 
         name: pkl file where the data from DSetGlobal is stored. Code makes sure
         that there is the same ratio of positive/negative images in each dataset.
         This is done, setting a lmda. If you set a really low lmda, you might have
         to stop the program and rerun it a few times.
         Returns a pkl with a segmented dataset. seg_data is a list of n lists, where n
         is the number of datasets desired. These n lists consist of 2 lists: the data
         and its corresponding labels.
         '''
	 ratios = {'train_1': 0.62234,
		'train_2': 0.8499,
		'train_3': 0.53817,
		'test_1': 0.8881987,
		'test_2': 0.51543,
		'test_3': 0.84473
		   }
         f = file(name, 'rb')
         datapapa = cPickle.load(f)
         f.close()    
         w = datapapa[0]
         x = datapapa[1]
         y = range(len(x))
         seg_data = []
         counter = 0
         size = int(len(y)/float(ndataset))
	 print size, 'gamboozle'
         while counter < ndataset:
             z = random.sample(y, size)
             lmda = 0.02
             ratio = float(sum([x[i] for i in z]))/(len([x[i] for i in z if x[i]==0]))
             print(ratio), v
	     #exit()
             dif = math.fabs(ratio-ratios[v]) #ratio of positive to negatives
             if dif < lmda:
                 print('BINGO!', counter, dif)
                 y = [i for i in y if i not in z]
                 current_label = [x[i] for i in z]
                 current_data = [w[i] for i in z]
                 seg_data.append([current_data, current_label])
                 counter+=1
             else:
                 #print('Does not have a acceptable ratio', ratio, dif)
                 #fun+= 1
                 pass 
         f = file('seg_MODS_data_{0}.pkl'.format(v), 'wb')
         cPickle.dump(seg_data, f, protocol=cPickle.HIGHEST_PROTOCOL)
         f.close() 
Example 77
Project: MODS_ConvNet   Author: santiagolopezg   File: dataset_labs_MODS.py    MIT License 4 votes vote down vote up
def Dset(self, v, ndataset=5, name='MODS_data.pkl'):
         '''
         function to build datasets. ndataset: number of datasets wanted; 
         name: pkl file where the data from DSetGlobal is stored. Code makes sure
         that there is the same ratio of positive/negative images in each dataset.
         This is done, setting a lmda. If you set a really low lmda, you might have
         to stop the program and rerun it a few times.
         Returns a pkl with a segmented dataset. seg_data is a list of n lists, where n
         is the number of datasets desired. These n lists consist of 2 lists: the data
         and its corresponding labels.
         '''
	 ratios = {'train_1': 0.62234,
		'train_2': 0.8499,
		'train_3': 0.53817,
		'test_1': 0.8881987,
		'test_2': 0.51543,
		'test_3': 0.84473
		   }
         f = file(name, 'rb')
         datapapa = cPickle.load(f)
         f.close()    
         w = datapapa[0]
         x = datapapa[1]
         y = range(len(x))
         seg_data = []
         counter = 0
         size = int(len(y)/float(ndataset))
	 print size, 'gamboozle'
         while counter < ndataset:
             z = random.sample(y, size)
             lmda = 0.02
             ratio = float(sum([x[i] for i in z]))/(len([x[i] for i in z if x[i]==0]))
             print(ratio), v
	     #exit()
             dif = math.fabs(ratio-ratios[v]) #ratio of positive to negatives
             if dif < lmda:
                 print('BINGO!', counter, dif)
                 y = [i for i in y if i not in z]
                 current_label = [x[i] for i in z]
                 current_data = [w[i] for i in z]
                 seg_data.append([current_data, current_label])
                 counter+=1
             else:
                 #print('Does not have a acceptable ratio', ratio, dif)
                 #fun+= 1
                 pass 
         f = file('seg_MODS_data_{0}.pkl'.format(v), 'wb')
         cPickle.dump(seg_data, f, protocol=cPickle.HIGHEST_PROTOCOL)
         f.close() 
Example 78
Project: programsynthesishunting   Author: flexgp   File: selection.py    GNU General Public License v3.0 4 votes vote down vote up
def novelty_tournament(population) -> List[Individual]:
    """
    Given an entire population, draw <tournament_size> competitors randomly and
    return the best based on how novel their phenotype is. Unless INVALID_SELECTION
    is true, only valid individuals can be selected for tournaments.

    :param population: A population from which to select individuals.
    :return: A population of the winners from tournaments.
    """

    # Initialise list of tournament winners.
    winners = []

    # Initialize novelty fitness evaluator
    novelty_eval = novelty()

    # The flag "INVALID_SELECTION" allows for selection of invalid individuals.
    if params['INVALID_SELECTION']:
        available = population
    else:
        available = [i for i in population if not i.invalid]

    while len(winners) < params['GENERATION_SIZE']:
        # Randomly choose TOURNAMENT_SIZE competitors from the given
        # population. Allows for re-sampling of individuals.
        competitors = sample(available, params['TOURNAMENT_SIZE'])

        best_competitor = None
        best_novelty = None
        for competitor in competitors:
            # Calculate the novelty of each competitor
            comp_novelty = novelty_eval(competitor)

            # If first time
            if best_novelty is None:
                best_competitor = competitor
                best_novelty = comp_novelty

            # Want to maximize novelty
            if comp_novelty > best_novelty:
                best_competitor = competitor
                best_novelty = comp_novelty

        # Return the single best competitor.
        winners.append(best_competitor)

    # Return the population of novelty tournament winners.
    return winners 
Example 79
Project: fuku-ml   Author: fukuball   File: PLA.py    MIT License 4 votes vote down vote up
def train(self):

        '''
        Train Perceptron Learning Algorithm
        From f(x) = WX
        Find best h(x) = WX similar to f(x)
        Output W
        '''

        if (self.status != 'init'):
            print("Please load train data and init W first.")
            return self.W

        self.status = 'train'

        if (self.loop_mode is 'random'):
            data_check_order = range(self.data_num)
            data_check_order = random.sample(data_check_order, self.data_num)
        elif (self.loop_mode is 'naive_cycle'):
            data_check_order = range(self.data_num)
        else:
            data_check_order = range(self.data_num)

        self.tune_times = 0
        k = 0
        flag = True

        while True:
            if (self.tune_times > (2 * self.data_num)):
                print("Dataset not linear separable.")
                break

            if k == self.data_num:
                if flag:
                    break
                k = 0
                flag = True

            point_wise_i = data_check_order[k]

            if self.error_function(self.score_function(self.train_X[point_wise_i], self.W), self.train_Y[point_wise_i]):
                flag = False
                self.tune_times += 1
                self.W = self.W + self.step_alpha * (self.train_Y[point_wise_i] * self.train_X[point_wise_i])
            k += 1

        return self.W 
Example 80
Project: DOTA_models   Author: ringringyi   File: train.py    Apache License 2.0 4 votes vote down vote up
def sample_episode_batch(self, data,
                           episode_length, episode_width, batch_size):
    """Generates a random batch for training or validation.

    Structures each element of the batch as an 'episode'.
    Each episode contains episode_length examples and
    episode_width distinct labels.

    Args:
      data: A dictionary mapping label to list of examples.
      episode_length: Number of examples in each episode.
      episode_width: Distinct number of labels in each episode.
      batch_size: Batch size (number of episodes).

    Returns:
      A tuple (x, y) where x is a list of batches of examples
      with size episode_length and y is a list of batches of labels.
    """

    episodes_x = [[] for _ in xrange(episode_length)]
    episodes_y = [[] for _ in xrange(episode_length)]
    assert len(data) >= episode_width
    keys = data.keys()
    for b in xrange(batch_size):
      episode_labels = random.sample(keys, episode_width)
      remainder = episode_length % episode_width
      remainders = [0] * (episode_width - remainder) + [1] * remainder
      episode_x = [
          random.sample(data[lab],
                        r + (episode_length - remainder) / episode_width)
          for lab, r in zip(episode_labels, remainders)]
      episode = sum([[(x, i, ii) for ii, x in enumerate(xx)]
                     for i, xx in enumerate(episode_x)], [])
      random.shuffle(episode)
      # Arrange episode so that each distinct label is seen before moving to
      # 2nd showing
      episode.sort(key=lambda elem: elem[2])
      assert len(episode) == episode_length
      for i in xrange(episode_length):
        episodes_x[i].append(episode[i][0])
        episodes_y[i].append(episode[i][1] + b * episode_width)

    return ([np.array(xx).astype('float32') for xx in episodes_x],
            [np.array(yy).astype('int32') for yy in episodes_y])