Python random.Random() Examples

The following are code examples for showing how to use random.Random(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: pyblish-win   Author: pyblish   File: test_random.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_zeroinputs(self):
        # Verify that distributions can handle a series of zero inputs'
        g = random.Random()
        x = [g.random() for i in xrange(50)] + [0.0]*5
        g.random = x[:].pop; g.uniform(1,10)
        g.random = x[:].pop; g.paretovariate(1.0)
        g.random = x[:].pop; g.expovariate(1.0)
        g.random = x[:].pop; g.weibullvariate(1.0, 1.0)
        g.random = x[:].pop; g.vonmisesvariate(1.0, 1.0)
        g.random = x[:].pop; g.normalvariate(0.0, 1.0)
        g.random = x[:].pop; g.gauss(0.0, 1.0)
        g.random = x[:].pop; g.lognormvariate(0.0, 1.0)
        g.random = x[:].pop; g.vonmisesvariate(0.0, 1.0)
        g.random = x[:].pop; g.gammavariate(0.01, 1.0)
        g.random = x[:].pop; g.gammavariate(1.0, 1.0)
        g.random = x[:].pop; g.gammavariate(200.0, 1.0)
        g.random = x[:].pop; g.betavariate(3.0, 3.0)
        g.random = x[:].pop; g.triangular(0.0, 1.0, 1.0/3.0) 
Example 2
Project: pyblish-win   Author: pyblish   File: test_random.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_constant(self):
        g = random.Random()
        N = 100
        for variate, args, expected in [
                (g.uniform, (10.0, 10.0), 10.0),
                (g.triangular, (10.0, 10.0), 10.0),
                (g.triangular, (10.0, 10.0, 10.0), 10.0),
                (g.expovariate, (float('inf'),), 0.0),
                (g.vonmisesvariate, (3.0, float('inf')), 3.0),
                (g.gauss, (10.0, 0.0), 10.0),
                (g.lognormvariate, (0.0, 0.0), 1.0),
                (g.lognormvariate, (-float('inf'), 0.0), 0.0),
                (g.normalvariate, (10.0, 0.0), 10.0),
                (g.paretovariate, (float('inf'),), 1.0),
                (g.weibullvariate, (10.0, float('inf')), 10.0),
                (g.weibullvariate, (0.0, 10.0), 0.0),
            ]:
            for i in range(N):
                self.assertEqual(variate(*args), expected) 
Example 3
Project: DOTA_models   Author: ringringyi   File: neural_programmer.py    Apache License 2.0 6 votes vote down vote up
def __init__(self):
    global FLAGS
    self.FLAGS = FLAGS
    self.unk_token = "UNK"
    self.entry_match_token = "entry_match"
    self.column_match_token = "column_match"
    self.dummy_token = "dummy_token"
    self.tf_data_type = {}
    self.tf_data_type["double"] = tf.float64
    self.tf_data_type["float"] = tf.float32
    self.np_data_type = {}
    self.np_data_type["double"] = np.float64
    self.np_data_type["float"] = np.float32
    self.operations_set = ["count"] + [
        "prev", "next", "first_rs", "last_rs", "group_by_max", "greater",
        "lesser", "geq", "leq", "max", "min", "word-match"
    ] + ["reset_select"] + ["print"]
    self.word_ids = {}
    self.reverse_word_ids = {}
    self.word_count = {}
    self.random = Random(FLAGS.python_seed) 
Example 4
Project: Dumb-Cogs   Author: irdumbs   File: adventure.py    MIT License 6 votes vote down vote up
def __init__(self, seed=None):
        Data.__init__(self)
        self.output = ''
        self.yesno_callback = False
        self.yesno_casual = False       # whether to insist they answer

        self.clock1 = 30                # counts down from finding last treasure
        self.clock2 = 50                # counts down until cave closes
        self.is_closing = False         # is the cave closing?
        self.panic = False              # they tried to leave during closing?
        self.is_closed = False          # is the cave closed?
        self.is_done = False            # caller can check for "game over"
        self.could_fall_in_pit = False  # could the player fall into a pit?

        self.random_generator = random.Random()
        if seed is not None:
            self.random_generator.seed(seed) 
Example 5
Project: ProceduralSettlementsInMinecraft   Author: abrightmoore   File: FarmLand.py    GNU General Public License v3.0 6 votes vote down vote up
def create(generatorName,level,areas):
	log("Generating a "+generatorName)
	while len(areas) > 0:
		(box,availableFlag) = areas.pop()
		width,height,depth = getDimensionsFromBox(box)
		log("Generating a "+generatorName+" at "+str(box))
		
		MINW = width-2
		MIND = depth-2
		SEED = randint(1000000000,9999999999)
		R = Random(SEED) # Seed goes here
		firstBox = (box.minx,box.minz,box.maxx-box.minx,box.maxz-box.minz)
		h = box.maxy-box.miny
		boxes = [firstBox]
		for i in xrange(0,randint(1,5)):
			boxes = chopUpBoxes2D(R,boxes,MINW,MIND)
		
		for (x,z,w,d) in boxes:
			Farmland(level,BoundingBox((x,box.miny,z),(w,h,d))) 
Example 6
Project: xblock-leaderboard   Author: open-craft   File: mock.py    GNU Affero General Public License v3.0 6 votes vote down vote up
def get_grades(self, target_block_id, limit_hint=None):
        """
        Generate random grades for target_block_id and descendants.

        Results should be consistent for use in tests etc.
        """
        # Even though this is just a mock source, verify target_block_id and that the runtime can load blocks for us:
        self.host_block.runtime.get_block(target_block_id)
        # Seed a random generator consistent based on target_block_id:
        rand = random.Random(target_block_id)

        if limit_hint is None:
            limit_hint = rand.randint(5, 80)

        def random_name():
            return u"{first} {initial}.".format(
                first=rand.choice(self.NAMES),
                initial=rand.choice(self.ALPHABET)
            )

        def random_grade():
            return rand.randint(0, 100)

        return [(random_grade(), {"name": random_name()}) for _ in xrange(0, limit_hint)] 
Example 7
Project: razzy-spinner   Author: rafasashi   File: hmm.py    GNU General Public License v3.0 6 votes vote down vote up
def demo_bw():
    # demo Baum Welch by generating some sequences and then performing
    # unsupervised training on them

    print()
    print("Baum-Welch demo for market example")
    print()

    model, states, symbols = _market_hmm_example()

    # generate some random sequences
    training = []
    import random
    rng = random.Random()
    rng.seed(0)
    for i in range(10):
        item = model.random_sample(rng, 5)
        training.append([(i[0], None) for i in item])

    # train on those examples, starting with the model that generated them
    trainer = HiddenMarkovModelTrainer(states, symbols)
    hmm = trainer.train_unsupervised(training, model=model,
                                     max_iterations=1000) 
Example 8
Project: dbscan   Author: aroques   File: dataset.py    MIT License 6 votes vote down vote up
def generate_random_points(num_points: int, bound_for_x: List[float], bound_for_y: List[float], seed: int):
    """Generate random data.

    Args:
        num_points: The number of points to generate.
        bound_for_x: The bounds for possible values of X.
        bound_for_y: The bounds for possible values of Y.
        seed: Seed for Random.

    Returns:
        N points
    """
    r = Random(seed)
    x_min, x_max = bound_for_x
    y_min, y_max = bound_for_y
    data = []
    for _ in range(num_points):
        x = x_min + (x_max - x_min) * r.random()
        y = y_min + (y_max - y_min) * r.random()
        point = (x, y)
        data.append(point)
    return data 
Example 9
Project: OpenBottle   Author: xiaozhuchacha   File: hmm.py    MIT License 6 votes vote down vote up
def demo_bw():
    # demo Baum Welch by generating some sequences and then performing
    # unsupervised training on them

    print()
    print("Baum-Welch demo for market example")
    print()

    model, states, symbols = _market_hmm_example()

    # generate some random sequences
    training = []
    import random
    rng = random.Random()
    rng.seed(0)
    for i in range(10):
        item = model.random_sample(rng, 5)
        training.append([(i[0], None) for i in item])

    # train on those examples, starting with the model that generated them
    trainer = HiddenMarkovModelTrainer(states, symbols)
    hmm = trainer.train_unsupervised(training, model=model,
                                     max_iterations=1000) 
Example 10
Project: OpenBottle   Author: xiaozhuchacha   File: hmm.py    MIT License 6 votes vote down vote up
def demo_bw():
    # demo Baum Welch by generating some sequences and then performing
    # unsupervised training on them

    print()
    print("Baum-Welch demo for market example")
    print()

    model, states, symbols = _market_hmm_example()

    # generate some random sequences
    training = []
    import random
    rng = random.Random()
    rng.seed(0)
    for i in range(10):
        item = model.random_sample(rng, 5)
        training.append([(i[0], None) for i in item])

    # train on those examples, starting with the model that generated them
    trainer = HiddenMarkovModelTrainer(states, symbols)
    hmm = trainer.train_unsupervised(training, model=model,
                                     max_iterations=1000) 
Example 11
Project: NiujiaoDebugger   Author: MrSrc   File: test_tarfile.py    GNU General Public License v3.0 6 votes vote down vote up
def test_ignore_zeros(self):
        # Test TarFile's ignore_zeros option.
        # generate 512 pseudorandom bytes
        data = Random(0).getrandbits(512*8).to_bytes(512, 'big')
        for char in (b'\0', b'a'):
            # Test if EOFHeaderError ('\0') and InvalidHeaderError ('a')
            # are ignored correctly.
            with self.open(tmpname, "w") as fobj:
                fobj.write(char * 1024)
                tarinfo = tarfile.TarInfo("foo")
                tarinfo.size = len(data)
                fobj.write(tarinfo.tobuf())
                fobj.write(data)

            tar = tarfile.open(tmpname, mode="r", ignore_zeros=True)
            try:
                self.assertListEqual(tar.getnames(), ["foo"],
                    "ignore_zeros=True should have skipped the %r-blocks" %
                    char)
            finally:
                tar.close() 
Example 12
Project: NiujiaoDebugger   Author: MrSrc   File: test_random.py    GNU General Public License v3.0 6 votes vote down vote up
def test_zeroinputs(self):
        # Verify that distributions can handle a series of zero inputs'
        g = random.Random()
        x = [g.random() for i in range(50)] + [0.0]*5
        g.random = x[:].pop; g.uniform(1,10)
        g.random = x[:].pop; g.paretovariate(1.0)
        g.random = x[:].pop; g.expovariate(1.0)
        g.random = x[:].pop; g.weibullvariate(1.0, 1.0)
        g.random = x[:].pop; g.vonmisesvariate(1.0, 1.0)
        g.random = x[:].pop; g.normalvariate(0.0, 1.0)
        g.random = x[:].pop; g.gauss(0.0, 1.0)
        g.random = x[:].pop; g.lognormvariate(0.0, 1.0)
        g.random = x[:].pop; g.vonmisesvariate(0.0, 1.0)
        g.random = x[:].pop; g.gammavariate(0.01, 1.0)
        g.random = x[:].pop; g.gammavariate(1.0, 1.0)
        g.random = x[:].pop; g.gammavariate(200.0, 1.0)
        g.random = x[:].pop; g.betavariate(3.0, 3.0)
        g.random = x[:].pop; g.triangular(0.0, 1.0, 1.0/3.0) 
Example 13
Project: NiujiaoDebugger   Author: MrSrc   File: test_random.py    GNU General Public License v3.0 6 votes vote down vote up
def test_constant(self):
        g = random.Random()
        N = 100
        for variate, args, expected in [
                (g.uniform, (10.0, 10.0), 10.0),
                (g.triangular, (10.0, 10.0), 10.0),
                (g.triangular, (10.0, 10.0, 10.0), 10.0),
                (g.expovariate, (float('inf'),), 0.0),
                (g.vonmisesvariate, (3.0, float('inf')), 3.0),
                (g.gauss, (10.0, 0.0), 10.0),
                (g.lognormvariate, (0.0, 0.0), 1.0),
                (g.lognormvariate, (-float('inf'), 0.0), 0.0),
                (g.normalvariate, (10.0, 0.0), 10.0),
                (g.paretovariate, (float('inf'),), 1.0),
                (g.weibullvariate, (10.0, float('inf')), 10.0),
                (g.weibullvariate, (0.0, 10.0), 0.0),
            ]:
            for i in range(N):
                self.assertEqual(variate(*args), expected) 
Example 14
Project: NiujiaoDebugger   Author: MrSrc   File: test_random.py    GNU General Public License v3.0 6 votes vote down vote up
def test_after_fork(self):
        # Test the global Random instance gets reseeded in child
        r, w = os.pipe()
        pid = os.fork()
        if pid == 0:
            # child process
            try:
                val = random.getrandbits(128)
                with open(w, "w") as f:
                    f.write(str(val))
            finally:
                os._exit(0)
        else:
            # parent process
            os.close(w)
            val = random.getrandbits(128)
            with open(r, "r") as f:
                child_val = eval(f.read())
            self.assertNotEqual(val, child_val)

            pid, status = os.waitpid(pid, 0)
            self.assertEqual(status, 0) 
Example 15
Project: NiujiaoDebugger   Author: MrSrc   File: stringbench.py    GNU General Public License v3.0 6 votes vote down vote up
def _make_2000_lines():
    import random
    r = random.Random(100)
    chars = list(map(chr, range(32, 128)))
    i = 0
    while i < len(chars):
        chars[i] = " "
        i += r.randrange(9)
    s = "".join(chars)
    s = s*4
    words = []
    for i in range(2000):
        start = r.randrange(96)
        n = r.randint(5, 65)
        words.append(s[start:start+n])
    return "\n".join(words)+"\n" 
Example 16
Project: 0xbtc-discord-price-bot   Author: 0x1d00ffff   File: all_self_tests.py    MIT License 6 votes vote down vote up
def test_fuzzing_prettify_decimals(self):
        from formatting_helpers import prettify_decimals
        import random
        iterations = 100000  # 4 seconds on an i7-5700HQ
        min_value, max_value = -1e30, 1e30
        # fixed seed so randomness is repeatable
        myrandom = random.Random("myseed")
        # test formatting integers
        for _ in range(iterations):
            number = myrandom.randint(min_value, max_value)
            with self.subTest(number=number):
                formatted = prettify_decimals(number)
                self.assertTrue(isinstance(formatted, str))
                self.assertTrue(len(formatted) <= 16)
        # test formatting floats
        for _ in range(iterations):
            number = myrandom.uniform(min_value, max_value)
            with self.subTest(number=number):
                formatted = prettify_decimals(number)
                self.assertTrue(isinstance(formatted, str))
                self.assertTrue(len(formatted) <= 16) 
Example 17
Project: Health-Checker   Author: KriAga   File: hmm.py    MIT License 6 votes vote down vote up
def demo_bw():
    # demo Baum Welch by generating some sequences and then performing
    # unsupervised training on them

    print()
    print("Baum-Welch demo for market example")
    print()

    model, states, symbols = _market_hmm_example()

    # generate some random sequences
    training = []
    import random
    rng = random.Random()
    rng.seed(0)
    for i in range(10):
        item = model.random_sample(rng, 5)
        training.append([(i[0], None) for i in item])

    # train on those examples, starting with the model that generated them
    trainer = HiddenMarkovModelTrainer(states, symbols)
    hmm = trainer.train_unsupervised(training, model=model,
                                     max_iterations=1000) 
Example 18
Project: BERT-Classification-Tutorial   Author: Socialbird-AILab   File: modeling_test.py    Apache License 2.0 5 votes vote down vote up
def ids_tensor(cls, shape, vocab_size, rng=None, name=None):
        """Creates a random int32 tensor of the shape within the vocab size."""
        if rng is None:
            rng = random.Random()

        total_dims = 1
        for dim in shape:
            total_dims *= dim

        values = []
        for _ in range(total_dims):
            values.append(rng.randint(0, vocab_size - 1))

        return tf.constant(value=values, dtype=tf.int32, shape=shape, name=name) 
Example 19
Project: pyblish-win   Author: pyblish   File: tempfile.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def rng(self):
        cur_pid = _os.getpid()
        if cur_pid != getattr(self, '_rng_pid', None):
            self._rng = _Random()
            self._rng_pid = cur_pid
        return self._rng 
Example 20
Project: pyblish-win   Author: pyblish   File: test_zlib.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_odd_flush(self):
        # Test for odd flushing bugs noted in 2.0, and hopefully fixed in 2.1
        import random
        # Testing on 17K of "random" data

        # Create compressor and decompressor objects
        co = zlib.compressobj(zlib.Z_BEST_COMPRESSION)
        dco = zlib.decompressobj()

        # Try 17K of data
        # generate random data stream
        try:
            # In 2.3 and later, WichmannHill is the RNG of the bug report
            gen = random.WichmannHill()
        except AttributeError:
            try:
                # 2.2 called it Random
                gen = random.Random()
            except AttributeError:
                # others might simply have a single RNG
                gen = random
        gen.seed(1)
        data = genblock(1, 17 * 1024, generator=gen)

        # compress, sync-flush, and decompress
        first = co.compress(data)
        second = co.flush(zlib.Z_SYNC_FLUSH)
        expanded = dco.decompress(first + second)

        # if decompressed data is different from the input data, choke.
        self.assertEqual(expanded, data, "17K random source doesn't match") 
Example 21
Project: pyblish-win   Author: pyblish   File: test_random.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_avg_std(self):
        # Use integration to test distribution average and standard deviation.
        # Only works for distributions which do not consume variates in pairs
        g = random.Random()
        N = 5000
        x = [i/float(N) for i in xrange(1,N)]
        for variate, args, mu, sigmasqrd in [
                (g.uniform, (1.0,10.0), (10.0+1.0)/2, (10.0-1.0)**2/12),
                (g.triangular, (0.0, 1.0, 1.0/3.0), 4.0/9.0, 7.0/9.0/18.0),
                (g.expovariate, (1.5,), 1/1.5, 1/1.5**2),
                (g.vonmisesvariate, (1.23, 0), pi, pi**2/3),
                (g.paretovariate, (5.0,), 5.0/(5.0-1),
                                  5.0/((5.0-1)**2*(5.0-2))),
                (g.weibullvariate, (1.0, 3.0), gamma(1+1/3.0),
                                  gamma(1+2/3.0)-gamma(1+1/3.0)**2) ]:
            g.random = x[:].pop
            y = []
            for i in xrange(len(x)):
                try:
                    y.append(variate(*args))
                except IndexError:
                    pass
            s1 = s2 = 0
            for e in y:
                s1 += e
                s2 += (e - mu) ** 2
            N = len(y)
            self.assertAlmostEqual(s1/N, mu, places=2,
                                   msg='%s%r' % (variate.__name__, args))
            self.assertAlmostEqual(s2/(N-1), sigmasqrd, places=2,
                                   msg='%s%r' % (variate.__name__, args)) 
Example 22
Project: pyblish-win   Author: pyblish   File: test_random.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_random_subclass_with_kwargs(self):
        # SF bug #1486663 -- this used to erroneously raise a TypeError
        class Subclass(random.Random):
            def __init__(self, newarg=None):
                random.Random.__init__(self)
        Subclass(newarg=1) 
Example 23
Project: flasky   Author: RoseOu   File: flask_httpauth.py    MIT License 5 votes vote down vote up
def __init__(self, scheme=None, realm=None, use_ha1_pw=False):
        super(HTTPDigestAuth, self).__init__(scheme, realm)
        self.use_ha1_pw = use_ha1_pw
        self.random = SystemRandom()
        try:
            self.random.random()
        except NotImplementedError:
            self.random = Random()

        def _generate_random():
            return md5(str(self.random.random()).encode('utf-8')).hexdigest()

        def default_generate_nonce():
            session["auth_nonce"] = _generate_random()
            return session["auth_nonce"]

        def default_verify_nonce(nonce):
            return nonce == session.get("auth_nonce")

        def default_generate_opaque():
            session["auth_opaque"] = _generate_random()
            return session["auth_opaque"]

        def default_verify_opaque(opaque):
            return opaque == session.get("auth_opaque")

        self.generate_nonce(default_generate_nonce)
        self.generate_opaque(default_generate_opaque)
        self.verify_nonce(default_verify_nonce)
        self.verify_opaque(default_verify_opaque) 
Example 24
Project: dynamic-training-with-apache-mxnet-on-aws   Author: awslabs   File: stt_datagenerator.py    Apache License 2.0 5 votes vote down vote up
def __init__(self, save_dir, model_name, step=10, window=20, max_freq=8000, desc_file=None):
        """
        Params:
            step (int): Step size in milliseconds between windows
            window (int): FFT window size in milliseconds
            max_freq (int): Only FFT bins corresponding to frequencies between
                [0, max_freq] are returned
            desc_file (str, optional): Path to a JSON-line file that contains
                labels and paths to the audio files. If this is None, then
                load metadata right away
        """
        #calc_feat_dim returns int(0.001*window*max_freq)+1
        super(DataGenerator, self).__init__()
        # feat_dim=0.001*20*8000+1=161
        self.feat_dim = calc_feat_dim(window, max_freq)
        # 1d 161 length of array filled with zeros
        self.feats_mean = np.zeros((self.feat_dim,))
        # 1d 161 length of array filled with 1s
        self.feats_std = np.ones((self.feat_dim,))
        self.max_input_length = 0
        self.max_length_list_in_batch = []
        # 1d 161 length of array filled with random value
        #[0.0, 1.0)
        self.rng = random.Random()
        if desc_file is not None:
            self.load_metadata_from_desc_file(desc_file)
        self.step = step
        self.window = window
        self.max_freq = max_freq
        self.save_dir = save_dir
        self.model_name = model_name 
Example 25
Project: sic   Author: Yanixos   File: tempfile.py    GNU General Public License v3.0 5 votes vote down vote up
def rng(self):
        cur_pid = _os.getpid()
        if cur_pid != getattr(self, '_rng_pid', None):
            self._rng = _Random()
            self._rng_pid = cur_pid
        return self._rng 
Example 26
Project: Dumb-Cogs   Author: irdumbs   File: adventure.py    MIT License 5 votes vote down vote up
def resume(self, obj):
        """Returns an Adventure game saved to the given file."""
        if isinstance(obj, str):
            savefile = open(obj, 'rb')
        else:
            savefile = obj
        game = pickle.loads(zlib.decompress(savefile.read()))
        if savefile is not obj:
            savefile.close()
        # Reinstate the random number generator.
        game.random_generator = random.Random()
        game.random_generator.setstate(game.random_state)
        del game.random_state
        return game 
Example 27
Project: mpu   Author: MartinThoma   File: datetime.py    MIT License 5 votes vote down vote up
def generate(minimum, maximum, local_random=random.Random()):
    """
    Generate a random date.

    The generated dates are uniformly distributed.

    Parameters
    ----------
    minimum : datetime object
    maximum : datetime object
    local_random : random.Random

    Returns
    -------
    generated_date : datetime object

    Examples
    --------
    >>> import random; r = random.Random(); r.seed(0)
    >>> from datetime import datetime

    >>> generate(datetime(2018, 1, 1), datetime(2018, 1, 2), local_random=r)
    datetime.datetime(2018, 1, 1, 20, 15, 58, 47972)

    >>> generate(datetime(2018, 1, 1), datetime(2018, 1, 2), local_random=r)
    datetime.datetime(2018, 1, 1, 18, 11, 27, 260414)
    """
    if not (minimum < maximum):
        raise ValueError("{} is not smaller than {}".format(minimum, maximum))

    # Python 3 allows direct multiplication of timedelta with a float, but
    # Python 2.7 does not. Hence this work-around.
    time_d = maximum - minimum
    time_d_float = time_d.total_seconds()
    time_d_rand = dt.timedelta(seconds=time_d_float * local_random.random())
    generated = minimum + time_d_rand
    return generated 
Example 28
Project: bert-sts   Author: swen128   File: modeling_test.py    Apache License 2.0 5 votes vote down vote up
def ids_tensor(cls, shape, vocab_size, rng=None, name=None):
    """Creates a random int32 tensor of the shape within the vocab size."""
    if rng is None:
      rng = random.Random()

    total_dims = 1
    for dim in shape:
      total_dims *= dim

    values = []
    for _ in range(total_dims):
      values.append(rng.randint(0, vocab_size - 1))

    return tf.constant(value=values, dtype=tf.int32, shape=shape, name=name) 
Example 29
Project: bert-sts   Author: swen128   File: create_pretraining_data.py    Apache License 2.0 5 votes vote down vote up
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)

  tokenizer = tokenization.FullTokenizer(
      vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)

  input_files = []
  for input_pattern in FLAGS.input_file.split(","):
    input_files.extend(tf.gfile.Glob(input_pattern))

  tf.logging.info("*** Reading from input files ***")
  for input_file in input_files:
    tf.logging.info("  %s", input_file)

  rng = random.Random(FLAGS.random_seed)
  instances = create_training_instances(
      input_files, tokenizer, FLAGS.max_seq_length, FLAGS.dupe_factor,
      FLAGS.short_seq_prob, FLAGS.masked_lm_prob, FLAGS.max_predictions_per_seq,
      rng)

  output_files = FLAGS.output_file.split(",")
  tf.logging.info("*** Writing to output files ***")
  for output_file in output_files:
    tf.logging.info("  %s", output_file)

  write_instance_to_example_files(instances, tokenizer, FLAGS.max_seq_length,
                                  FLAGS.max_predictions_per_seq, output_files) 
Example 30
Project: jawfish   Author: war-and-code   File: tempfile.py    MIT License 5 votes vote down vote up
def rng(self):
        cur_pid = _os.getpid()
        if cur_pid != getattr(self, '_rng_pid', None):
            self._rng = _Random()
            self._rng_pid = cur_pid
        return self._rng 
Example 31
Project: edit-bone-constraint   Author: HolisticCoders   File: misc.py    GNU General Public License v3.0 5 votes vote down vote up
def create_py_random_state(random_state=None):
    """Returns a random.Random instance depending on input.

    Parameters
    ----------
    random_state : int or random number generator or None (default=None)
        If int, return a random.Random instance set with seed=int.
        if random.Random instance, return it.
        if None or the `random` package, return the global random number
        generator used by `random`.
        if np.random package, return the global numpy random number
        generator wrapped in a PythonRandomInterface class.
        if np.random.RandomState instance, return it wrapped in
        PythonRandomInterface
        if a PythonRandomInterface instance, return it
    """
    import random
    try:
        import numpy as np
        if random_state is np.random:
            return PythonRandomInterface(np.random.mtrand._rand)
        if isinstance(random_state, np.random.RandomState):
            return PythonRandomInterface(random_state)
        if isinstance(random_state, PythonRandomInterface):
            return random_state
        has_numpy = True
    except ImportError:
        has_numpy = False

    if random_state is None or random_state is random:
        return random._inst
    if isinstance(random_state, random.Random):
        return random_state
    if isinstance(random_state, int):
        return random.Random(random_state)
    msg = '%r cannot be used to generate a random.Random instance'
    raise ValueError(msg % random_state)


# fixture for nose tests 
Example 32
Project: razzy-spinner   Author: rafasashi   File: hmm.py    GNU General Public License v3.0 5 votes vote down vote up
def random_sample(self, rng, length):
        """
        Randomly sample the HMM to generate a sentence of a given length. This
        samples the prior distribution then the observation distribution and
        transition distribution for each subsequent observation and state.
        This will mostly generate unintelligible garbage, but can provide some
        amusement.

        @return:        the randomly created state/observation sequence,
                        generated according to the HMM's probability
                        distributions. The SUBTOKENS have TEXT and TAG
                        properties containing the observation and state
                        respectively.
        @rtype:         list
        @param rng:     random number generator
        @type rng:      Random (or any object with a random() method)
        @param length:  desired output length
        @type length:   int
        """

        # sample the starting state and symbol prob dists
        tokens = []
        state = self._sample_probdist(self._priors, rng.random(), self._states)
        symbol = self._sample_probdist(self._outputs[state],
                                  rng.random(), self._symbols)
        tokens.append((symbol, state))

        for i in range(1, length):
            # sample the state transition and symbol prob dists
            state = self._sample_probdist(self._transitions[state],
                                     rng.random(), self._states)
            symbol = self._sample_probdist(self._outputs[state],
                                      rng.random(), self._symbols)
            tokens.append((symbol, state))

        return tokens 
Example 33
Project: razzy-spinner   Author: rafasashi   File: hmm.py    GNU General Public License v3.0 5 votes vote down vote up
def demo_bw():
    # demo Baum Welch by generating some sequences and then performing
    # unsupervised training on them

    # example taken from page 381, Huang et al
    symbols = ['up', 'down', 'unchanged']
    states = ['bull', 'bear', 'static']

    def pd(values, samples):
        d = {}
        for value, item in zip(values, samples):
            d[item] = value
        return DictionaryProbDist(d)

    def cpd(array, conditions, samples):
        d = {}
        for values, condition in zip(array, conditions):
            d[condition] = pd(values, samples)
        return DictionaryConditionalProbDist(d)

    A = array([[0.6, 0.2, 0.2], [0.5, 0.3, 0.2], [0.4, 0.1, 0.5]], float64)
    A = cpd(A, states, states)
    B = array([[0.7, 0.1, 0.2], [0.1, 0.6, 0.3], [0.3, 0.3, 0.4]], float64)
    B = cpd(B, states, symbols)
    pi = array([0.5, 0.2, 0.3], float64)
    pi = pd(pi, states)

    model = HiddenMarkovModel(symbols=symbols, states=states,
                              transitions=A, outputs=B, priors=pi)

    # generate some random sequences
    training = []
    import random
    rng = random.Random()
    for i in range(10):
        item = model.random_sample(rng, 5)
        training.append([(i[0], None) for i in item])

    # train on those examples, starting with the model that generated them
    trainer = HiddenMarkovModelTrainer(states, symbols)
    hmm = trainer.train_unsupervised(training, model=model, max_iterations=1000) 
Example 34
Project: razzy-spinner   Author: rafasashi   File: hmm.py    GNU General Public License v3.0 5 votes vote down vote up
def random_sample(self, rng, length):
        """
        Randomly sample the HMM to generate a sentence of a given length. This
        samples the prior distribution then the observation distribution and
        transition distribution for each subsequent observation and state.
        This will mostly generate unintelligible garbage, but can provide some
        amusement.

        :return:        the randomly created state/observation sequence,
                        generated according to the HMM's probability
                        distributions. The SUBTOKENS have TEXT and TAG
                        properties containing the observation and state
                        respectively.
        :rtype:         list
        :param rng:     random number generator
        :type rng:      Random (or any object with a random() method)
        :param length:  desired output length
        :type length:   int
        """

        # sample the starting state and symbol prob dists
        tokens = []
        state = self._sample_probdist(self._priors, rng.random(), self._states)
        symbol = self._sample_probdist(self._outputs[state],
                                  rng.random(), self._symbols)
        tokens.append((symbol, state))

        for i in range(1, length):
            # sample the state transition and symbol prob dists
            state = self._sample_probdist(self._transitions[state],
                                     rng.random(), self._states)
            symbol = self._sample_probdist(self._outputs[state],
                                      rng.random(), self._symbols)
            tokens.append((symbol, state))

        return tokens 
Example 35
Project: razzy-spinner   Author: rafasashi   File: kmeans.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, num_means, distance, repeats=1,
                       conv_test=1e-6, initial_means=None,
                       normalise=False, svd_dimensions=None,
                       rng=None, avoid_empty_clusters=False):

        """
        :param  num_means:  the number of means to use (may use fewer)
        :type   num_means:  int
        :param  distance:   measure of distance between two vectors
        :type   distance:   function taking two vectors and returing a float
        :param  repeats:    number of randomised clustering trials to use
        :type   repeats:    int
        :param  conv_test:  maximum variation in mean differences before
                            deemed convergent
        :type   conv_test:  number
        :param  initial_means: set of k initial means
        :type   initial_means: sequence of vectors
        :param  normalise:  should vectors be normalised to length 1
        :type   normalise:  boolean
        :param svd_dimensions: number of dimensions to use in reducing vector
                               dimensionsionality with SVD
        :type svd_dimensions: int
        :param  rng:        random number generator (or None)
        :type   rng:        Random
        :param avoid_empty_clusters: include current centroid in computation
                                     of next one; avoids undefined behavior
                                     when clusters become empty
        :type avoid_empty_clusters: boolean
        """
        VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
        self._num_means = num_means
        self._distance = distance
        self._max_difference = conv_test
        assert not initial_means or len(initial_means) == num_means
        self._means = initial_means
        assert repeats >= 1
        assert not (initial_means and repeats > 1)
        self._repeats = repeats
        self._rng = (rng if rng else random.Random())
        self._avoid_empty_clusters = avoid_empty_clusters 
Example 36
Project: zun   Author: openstack   File: name_generator.py    Apache License 2.0 5 votes vote down vote up
def __init__(self):
        self.random = Random() 
Example 37
Project: robot-navigation   Author: ronaldahmed   File: dictionary.py    MIT License 5 votes vote down vote up
def __init__(self, dictionary, annealing_schedule, 
                 stemmer=None, has_pos=True, style='bag', seed=None):
        LeskWordSenseTagger.__init__(self, 0, dictionary,
                                     stemmer, has_pos, style)
        self._annealing_schedule = annealing_schedule
        self._random = random.Random(seed) 
Example 38
Project: PAN   Author: WANG-Chaoyue   File: rng.py    MIT License 5 votes vote down vote up
def set_seed(n):
    global seed, py_rng, np_rng, t_rng
    
    seed = n
    py_rng = Random(seed)
    np_rng = RandomState(seed)
    t_rng = RandomStreams(seed) 
Example 39
Project: recipes-py   Author: luci   File: api.py    Apache License 2.0 5 votes vote down vote up
def __init__(self, module_properties, **kwargs):
    super(RandomApi, self).__init__(**kwargs)
    self._random = random.Random(
      module_properties.get(
        'seed', 1234 if self._test_data.enabled else None)) 
Example 40
Project: recipes-py   Author: luci   File: api.py    Apache License 2.0 5 votes vote down vote up
def __getattr__(self, name):
    """Access a member of `random.Random`."""
    return getattr(self._random, name) 
Example 41
Project: dbscan   Author: aroques   File: dataviz.py    MIT License 5 votes vote down vote up
def get_markers(num_markers):
    random = Random(3)
    markers = ['*', 'o', '^', '+', 'x']
    markers = random.sample(population=markers, k=num_markers)
    return markers 
Example 42
Project: dbscan   Author: aroques   File: dataviz.py    MIT License 5 votes vote down vote up
def get_palette(num_colors):
    random = Random(3)
    colors = ['blue', 'darkgreen', 'purple', 'red']
    colors = random.sample(population=colors, k=num_colors)
    return colors 
Example 43
Project: raspiblitz   Author: rootzoll   File: memo.py    MIT License 5 votes vote down vote up
def adjective_noun_pair():
    """
    taken from https://github.com/aaronbassett/Pass-phrase/blob/master/pass_phrase.py

    The MIT License (MIT)
    Copyright (c) 2012 Aaron Bassett, http://aaronbassett.com
    Permission is hereby granted, free of charge, to any person 
    obtaining a copy of this software and associated documentation 
    files (the "Software"), to deal in the Software without restriction, 
    including without limitation the rights to use, copy, modify, 
    merge, publish, distribute, sublicense, and/or sell copies of the 
    Software, and to permit persons to whom the Software is furnished 
    to do so, subject to the following conditions:
    The above copyright notice and this permission notice shall be 
    included in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, 
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 
    IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 
    IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    """

    # random.SystemRandom() should be cryptographically secure
    try:
        rng = random.SystemRandom
    except AttributeError:
        sys.stderr.write("WARNING: System does not support cryptographically "
                         "secure random number generator or you are using Python "
                         "version < 2.4.\n"
                         "Continuing with less-secure generator.\n")
        rng = random.Random

    adjective = rng().choice(adjectives)
    noun = rng().choice(nouns)
    return adjective, noun 
Example 44
Project: DRCOG_Urbansim   Author: apdjustino   File: synthesizer_create_persons_input_table.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def get_random_index_name():
    letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    return 'indx_' + ''.join(Random().sample(letters, 8)) 
Example 45
Project: lattice   Author: tensorflow   File: separately_calibrated_rtl.py    Apache License 2.0 5 votes vote down vote up
def _create_structure(self, input_dim, num_lattices, lattice_rank, rtl_seed):
    """Create and save rtl structure to model_dir."""
    rtl_random = random.Random(rtl_seed)
    structure = []
    for _ in range(num_lattices):
      structure.append(
          rtl_random.sample(six.moves.xrange(input_dim), lattice_rank))
    return structure 
Example 46
Project: lattice   Author: tensorflow   File: calibrated_rtl.py    Apache License 2.0 5 votes vote down vote up
def _create_structure(self, input_dim, num_lattices, lattice_rank, rtl_seed):
    """Create and save rtl structure to model_dir."""
    rtl_random = random.Random(rtl_seed)
    structure = []
    for _ in range(num_lattices):
      structure.append(
          rtl_random.sample(six.moves.xrange(input_dim), lattice_rank))
    return structure 
Example 47
Project: bot   Author: python-discord   File: superstarify.py    MIT License 5 votes vote down vote up
def get_nick(infraction_id: int, member_id: int) -> str:
        """Randomly select a nickname from the Superstarify nickname list."""
        log.trace(f"Choosing a random nickname for superstar #{infraction_id}.")

        rng = random.Random(str(infraction_id) + str(member_id))
        return rng.choice(STAR_NAMES)

    # This cannot be static (must have a __func__ attribute). 
Example 48
Project: link-prediction_with_deep-learning   Author: cambridgeltl   File: graph.py    MIT License 5 votes vote down vote up
def random_walk(self, path_length, alpha=0, rand=random.Random(), start=None):
    """ Returns a truncated random walk.

        path_length: Length of the random walk.
        alpha: probability of restarts.
        start: the start node of the random walk.
    """
    G = self
    if start:
      path = [start]
    else:
      # Sampling is uniform w.r.t V, and not w.r.t E
      path = [rand.choice(G.keys())]

    while len(path) < path_length:
      cur = path[-1]
      if len(G[cur]) > 0:
        if rand.random() >= alpha:
          path.append(rand.choice(G[cur]))
        else:
          path.append(path[0])
      else:
        break
    return path

# TODO add build_walks in here 
Example 49
Project: link-prediction_with_deep-learning   Author: cambridgeltl   File: graph.py    MIT License 5 votes vote down vote up
def build_deepwalk_corpus(G, num_paths, path_length, alpha=0,
                      rand=random.Random(0)):
  walks = []

  nodes = list(G.nodes())
  
  for cnt in range(num_paths):
    rand.shuffle(nodes)
    for node in nodes:
      walks.append(G.random_walk(path_length, rand=rand, alpha=alpha, start=node))
  
  return walks 
Example 50
Project: link-prediction_with_deep-learning   Author: cambridgeltl   File: graph.py    MIT License 5 votes vote down vote up
def build_deepwalk_corpus_iter(G, num_paths, path_length, alpha=0,
                      rand=random.Random(0)):
  walks = []

  nodes = list(G.nodes())

  for cnt in range(num_paths):
    rand.shuffle(nodes)
    for node in nodes:
      yield G.random_walk(path_length, rand=rand, alpha=alpha, start=node) 
Example 51
Project: link-prediction_with_deep-learning   Author: cambridgeltl   File: walks.py    MIT License 5 votes vote down vote up
def write_walks_to_disk(G, filebase, num_paths, path_length, alpha=0, rand=random.Random(0), num_workers=cpu_count(),
                        always_rebuild=True):
  global __current_graph
  global __vertex2str
  __current_graph = G
  __vertex2str = {v:str(v) for v in G.nodes()}
  files_list = ["{}.{}".format(filebase, str(x)) for x in xrange(num_paths)]
  expected_size = len(G)
  args_list = []
  files = []

  if num_paths <= num_workers:
    paths_per_worker = [1 for x in range(num_paths)]
  else:
    paths_per_worker = [len(filter(lambda z: z!= None, [y for y in x]))
                        for x in graph.grouper(int(num_paths / num_workers)+1, range(1, num_paths+1))]

  with ProcessPoolExecutor(max_workers=num_workers) as executor:
    for size, file_, ppw in zip(executor.map(count_lines, files_list), files_list, paths_per_worker):
      if always_rebuild or size != (ppw*expected_size):
        args_list.append((ppw, path_length, alpha, random.Random(rand.randint(0, 2**31)), file_))
      else:
        files.append(file_)

  with ProcessPoolExecutor(max_workers=num_workers) as executor:
    for file_ in executor.map(_write_walks_to_disk, args_list):
      files.append(file_)

  return files 
Example 52
Project: link-prediction_with_deep-learning   Author: cambridgeltl   File: graph.py    MIT License 5 votes vote down vote up
def random_walk(self, path_length, alpha=0, rand=random.Random(), start=None):
    """ Returns a truncated random walk.

        path_length: Length of the random walk.
        alpha: probability of restarts.
        start: the start node of the random walk.
    """
    G = self
    if start:
      path = [start]
    else:
      # Sampling is uniform w.r.t V, and not w.r.t E
      path = [rand.choice(G.keys())]

    while len(path) < path_length:
      cur = path[-1]
      if len(G[cur]) > 0:
        if rand.random() >= alpha:
          path.append(rand.choice(G[cur]))
        else:
          path.append(path[0])
      else:
        break
    return path

# TODO add build_walks in here 
Example 53
Project: link-prediction_with_deep-learning   Author: cambridgeltl   File: graph.py    MIT License 5 votes vote down vote up
def build_deepwalk_corpus(G, num_paths, path_length, alpha=0,
                      rand=random.Random(0)):
  walks = []

  nodes = list(G.nodes())
  
  for cnt in range(num_paths):
    rand.shuffle(nodes)
    for node in nodes:
      walks.append(G.random_walk(path_length, rand=rand, alpha=alpha, start=node))
  
  return walks 
Example 54
Project: link-prediction_with_deep-learning   Author: cambridgeltl   File: graph.py    MIT License 5 votes vote down vote up
def build_deepwalk_corpus_iter(G, num_paths, path_length, alpha=0,
                      rand=random.Random(0)):
  walks = []

  nodes = list(G.nodes())

  for cnt in range(num_paths):
    rand.shuffle(nodes)
    for node in nodes:
      yield G.random_walk(path_length, rand=rand, alpha=alpha, start=node) 
Example 55
Project: link-prediction_with_deep-learning   Author: cambridgeltl   File: walks.py    MIT License 5 votes vote down vote up
def write_walks_to_disk(G, filebase, num_paths, path_length, alpha=0, rand=random.Random(0), num_workers=cpu_count(),
                        always_rebuild=True):
  global __current_graph
  global __vertex2str
  __current_graph = G
  __vertex2str = {v:str(v) for v in G.nodes()}
  files_list = ["{}.{}".format(filebase, str(x)) for x in xrange(num_paths)]
  expected_size = len(G)
  args_list = []
  files = []

  if num_paths <= num_workers:
    paths_per_worker = [1 for x in range(num_paths)]
  else:
    paths_per_worker = [len(filter(lambda z: z!= None, [y for y in x]))
                        for x in graph.grouper(int(num_paths / num_workers)+1, range(1, num_paths+1))]

  with ProcessPoolExecutor(max_workers=num_workers) as executor:
    for size, file_, ppw in zip(executor.map(count_lines, files_list), files_list, paths_per_worker):
      if always_rebuild or size != (ppw*expected_size):
        args_list.append((ppw, path_length, alpha, random.Random(rand.randint(0, 2**31)), file_))
      else:
        files.append(file_)

  with ProcessPoolExecutor(max_workers=num_workers) as executor:
    for file_ in executor.map(_write_walks_to_disk, args_list):
      files.append(file_)

  return files 
Example 56
Project: OpenBottle   Author: xiaozhuchacha   File: hmm.py    MIT License 5 votes vote down vote up
def random_sample(self, rng, length):
        """
        Randomly sample the HMM to generate a sentence of a given length. This
        samples the prior distribution then the observation distribution and
        transition distribution for each subsequent observation and state.
        This will mostly generate unintelligible garbage, but can provide some
        amusement.

        :return:        the randomly created state/observation sequence,
                        generated according to the HMM's probability
                        distributions. The SUBTOKENS have TEXT and TAG
                        properties containing the observation and state
                        respectively.
        :rtype:         list
        :param rng:     random number generator
        :type rng:      Random (or any object with a random() method)
        :param length:  desired output length
        :type length:   int
        """

        # sample the starting state and symbol prob dists
        tokens = []
        state = self._sample_probdist(self._priors, rng.random(), self._states)
        symbol = self._sample_probdist(self._outputs[state],
                                  rng.random(), self._symbols)
        tokens.append((symbol, state))

        for i in range(1, length):
            # sample the state transition and symbol prob dists
            state = self._sample_probdist(self._transitions[state],
                                     rng.random(), self._states)
            symbol = self._sample_probdist(self._outputs[state],
                                      rng.random(), self._symbols)
            tokens.append((symbol, state))

        return tokens 
Example 57
Project: OpenBottle   Author: xiaozhuchacha   File: kmeans.py    MIT License 5 votes vote down vote up
def __init__(self, num_means, distance, repeats=1,
                       conv_test=1e-6, initial_means=None,
                       normalise=False, svd_dimensions=None,
                       rng=None, avoid_empty_clusters=False):

        """
        :param  num_means:  the number of means to use (may use fewer)
        :type   num_means:  int
        :param  distance:   measure of distance between two vectors
        :type   distance:   function taking two vectors and returing a float
        :param  repeats:    number of randomised clustering trials to use
        :type   repeats:    int
        :param  conv_test:  maximum variation in mean differences before
                            deemed convergent
        :type   conv_test:  number
        :param  initial_means: set of k initial means
        :type   initial_means: sequence of vectors
        :param  normalise:  should vectors be normalised to length 1
        :type   normalise:  boolean
        :param svd_dimensions: number of dimensions to use in reducing vector
                               dimensionsionality with SVD
        :type svd_dimensions: int
        :param  rng:        random number generator (or None)
        :type   rng:        Random
        :param avoid_empty_clusters: include current centroid in computation
                                     of next one; avoids undefined behavior
                                     when clusters become empty
        :type avoid_empty_clusters: boolean
        """
        VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
        self._num_means = num_means
        self._distance = distance
        self._max_difference = conv_test
        assert not initial_means or len(initial_means) == num_means
        self._means = initial_means
        assert repeats >= 1
        assert not (initial_means and repeats > 1)
        self._repeats = repeats
        self._rng = (rng if rng else random.Random())
        self._avoid_empty_clusters = avoid_empty_clusters 
Example 58
Project: OpenBottle   Author: xiaozhuchacha   File: hmm.py    MIT License 5 votes vote down vote up
def random_sample(self, rng, length):
        """
        Randomly sample the HMM to generate a sentence of a given length. This
        samples the prior distribution then the observation distribution and
        transition distribution for each subsequent observation and state.
        This will mostly generate unintelligible garbage, but can provide some
        amusement.

        :return:        the randomly created state/observation sequence,
                        generated according to the HMM's probability
                        distributions. The SUBTOKENS have TEXT and TAG
                        properties containing the observation and state
                        respectively.
        :rtype:         list
        :param rng:     random number generator
        :type rng:      Random (or any object with a random() method)
        :param length:  desired output length
        :type length:   int
        """

        # sample the starting state and symbol prob dists
        tokens = []
        state = self._sample_probdist(self._priors, rng.random(), self._states)
        symbol = self._sample_probdist(self._outputs[state],
                                  rng.random(), self._symbols)
        tokens.append((symbol, state))

        for i in range(1, length):
            # sample the state transition and symbol prob dists
            state = self._sample_probdist(self._transitions[state],
                                     rng.random(), self._states)
            symbol = self._sample_probdist(self._outputs[state],
                                      rng.random(), self._symbols)
            tokens.append((symbol, state))

        return tokens 
Example 59
Project: MIALab   Author: ubern-mia   File: prepare_data.py    Apache License 2.0 5 votes vote down vote up
def split_dataset(train_split, subject_files):
    seed = 20

    all_ids = list(subject_files.keys())
    random.Random(seed).shuffle(all_ids)

    n_train = int(len(all_ids)*train_split)
    train_ids = all_ids[:n_train]
    test_ids = all_ids[n_train:]

    train_subject = {k: subject_files[k] for k in train_ids}
    test_subject = {k: subject_files[k] for k in test_ids}

    return train_subject, test_subject 
Example 60
Project: NiujiaoDebugger   Author: MrSrc   File: tempfile.py    GNU General Public License v3.0 5 votes vote down vote up
def rng(self):
        cur_pid = _os.getpid()
        if cur_pid != getattr(self, '_rng_pid', None):
            self._rng = _Random()
            self._rng_pid = cur_pid
        return self._rng 
Example 61
Project: NiujiaoDebugger   Author: MrSrc   File: test_zlib.py    GNU General Public License v3.0 5 votes vote down vote up
def test_odd_flush(self):
        # Test for odd flushing bugs noted in 2.0, and hopefully fixed in 2.1
        import random
        # Testing on 17K of "random" data

        # Create compressor and decompressor objects
        co = zlib.compressobj(zlib.Z_BEST_COMPRESSION)
        dco = zlib.decompressobj()

        # Try 17K of data
        # generate random data stream
        try:
            # In 2.3 and later, WichmannHill is the RNG of the bug report
            gen = random.WichmannHill()
        except AttributeError:
            try:
                # 2.2 called it Random
                gen = random.Random()
            except AttributeError:
                # others might simply have a single RNG
                gen = random
        gen.seed(1)
        data = genblock(1, 17 * 1024, generator=gen)

        # compress, sync-flush, and decompress
        first = co.compress(data)
        second = co.flush(zlib.Z_SYNC_FLUSH)
        expanded = dco.decompress(first + second)

        # if decompressed data is different from the input data, choke.
        self.assertEqual(expanded, data, "17K random source doesn't match") 
Example 62
Project: NiujiaoDebugger   Author: MrSrc   File: test_queue.py    GNU General Public License v3.0 5 votes vote down vote up
def run_threads(self, n_feeders, n_consumers, q, inputs,
                    feed_func, consume_func):
        results = []
        sentinel = None
        seq = inputs + [sentinel] * n_consumers
        seq.reverse()
        rnd = random.Random(42)

        exceptions = []
        def log_exceptions(f):
            def wrapper(*args, **kwargs):
                try:
                    f(*args, **kwargs)
                except BaseException as e:
                    exceptions.append(e)
            return wrapper

        feeders = [threading.Thread(target=log_exceptions(feed_func),
                                    args=(q, seq, rnd))
                   for i in range(n_feeders)]
        consumers = [threading.Thread(target=log_exceptions(consume_func),
                                      args=(q, results, sentinel))
                     for i in range(n_consumers)]

        with support.start_threads(feeders + consumers):
            pass

        self.assertFalse(exceptions)
        self.assertTrue(q.empty())
        self.assertEqual(q.qsize(), 0)

        return results 
Example 63
Project: NiujiaoDebugger   Author: MrSrc   File: test_random.py    GNU General Public License v3.0 5 votes vote down vote up
def test_seed_when_randomness_source_not_found(self, urandom_mock):
        # Random.seed() uses time.time() when an operating system specific
        # randomness source is not found. To test this on machines where it
        # exists, run the above test, test_seedargs(), again after mocking
        # os.urandom() so that it raises the exception expected when the
        # randomness source is not available.
        urandom_mock.side_effect = NotImplementedError
        self.test_seedargs() 
Example 64
Project: NiujiaoDebugger   Author: MrSrc   File: test_random.py    GNU General Public License v3.0 5 votes vote down vote up
def test_randbelow_overridden_random(self, random_mock):
        # Random._randbelow() can only use random() when the built-in one
        # has been overridden but no new getrandbits() method was supplied.
        random_mock.side_effect = random.SystemRandom().random
        maxsize = 1<<random.BPF
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", UserWarning)
            # Population range too large (n >= maxsize)
            self.gen._randbelow(maxsize+1, maxsize = maxsize)
        self.gen._randbelow(5640, maxsize = maxsize)
        # issue 33203: test that _randbelow raises ValueError on
        # n == 0 also in its getrandbits-independent branch.
        with self.assertRaises(ValueError):
            self.gen._randbelow(0, maxsize=maxsize)
        # This might be going too far to test a single line, but because of our
        # noble aim of achieving 100% test coverage we need to write a case in
        # which the following line in Random._randbelow() gets executed:
        #
        # rem = maxsize % n
        # limit = (maxsize - rem) / maxsize
        # r = random()
        # while r >= limit:
        #     r = random() # <== *This line* <==<
        #
        # Therefore, to guarantee that the while loop is executed at least
        # once, we need to mock random() so that it returns a number greater
        # than 'limit' the first time it gets called.

        n = 42
        epsilon = 0.01
        limit = (maxsize - (maxsize % n)) / maxsize
        random_mock.side_effect = [limit + epsilon, limit - epsilon]
        self.gen._randbelow(n, maxsize = maxsize) 
Example 65
Project: NiujiaoDebugger   Author: MrSrc   File: test_random.py    GNU General Public License v3.0 5 votes vote down vote up
def test_avg_std(self):
        # Use integration to test distribution average and standard deviation.
        # Only works for distributions which do not consume variates in pairs
        g = random.Random()
        N = 5000
        x = [i/float(N) for i in range(1,N)]
        for variate, args, mu, sigmasqrd in [
                (g.uniform, (1.0,10.0), (10.0+1.0)/2, (10.0-1.0)**2/12),
                (g.triangular, (0.0, 1.0, 1.0/3.0), 4.0/9.0, 7.0/9.0/18.0),
                (g.expovariate, (1.5,), 1/1.5, 1/1.5**2),
                (g.vonmisesvariate, (1.23, 0), pi, pi**2/3),
                (g.paretovariate, (5.0,), 5.0/(5.0-1),
                                  5.0/((5.0-1)**2*(5.0-2))),
                (g.weibullvariate, (1.0, 3.0), gamma(1+1/3.0),
                                  gamma(1+2/3.0)-gamma(1+1/3.0)**2) ]:
            g.random = x[:].pop
            y = []
            for i in range(len(x)):
                try:
                    y.append(variate(*args))
                except IndexError:
                    pass
            s1 = s2 = 0
            for e in y:
                s1 += e
                s2 += (e - mu) ** 2
            N = len(y)
            self.assertAlmostEqual(s1/N, mu, places=2,
                                   msg='%s%r' % (variate.__name__, args))
            self.assertAlmostEqual(s2/(N-1), sigmasqrd, places=2,
                                   msg='%s%r' % (variate.__name__, args)) 
Example 66
Project: NiujiaoDebugger   Author: MrSrc   File: test_random.py    GNU General Public License v3.0 5 votes vote down vote up
def test_von_mises_range(self):
        # Issue 17149: von mises variates were not consistently in the
        # range [0, 2*PI].
        g = random.Random()
        N = 100
        for mu in 0.0, 0.1, 3.1, 6.2:
            for kappa in 0.0, 2.3, 500.0:
                for _ in range(N):
                    sample = g.vonmisesvariate(mu, kappa)
                    self.assertTrue(
                        0 <= sample <= random.TWOPI,
                        msg=("vonmisesvariate({}, {}) produced a result {} out"
                             " of range [0, 2*pi]").format(mu, kappa, sample)) 
Example 67
Project: 0xbtc-discord-price-bot   Author: 0x1d00ffff   File: command_handlers.py    MIT License 5 votes vote down vote up
def cmd_help(command_str, discord_message, apis):
    return ("trading commands: `price`  `price <exchange>`  `volume`  `ratio`  `rank`  `btc`  `eth`  `marketcap`\n"
            + "price commands: {}\n".format("  ".join("`{}`".format(c[1][0]) for c in random.Random(datetime.date.today().strftime("%j")).sample(config.EXPENSIVE_STUFF, 5)))
            #+ "bot commands: `uptime ping` "
            + "token info: `supply`  `difficulty`  `hashrate`  `blocktime`  `holders`  `halvening`  `burned` `balance`\n"
            + "quick link commands: `whitepaper`  `website`  `ann`  `contract`  `stats`  `miners`  `merch`\n"
            + "tools: `convert`  `income`  `mine`") 
Example 68
Project: 0xbtc-discord-price-bot   Author: 0x1d00ffff   File: all_self_tests.py    MIT License 5 votes vote down vote up
def get_fuzzing_iterator(seed=None):
    import random
    import string

    myrandom = random.Random(seed)
    command_strings = generate_command_list()
    idx = 0

    while(True):
        # generate numbers/bytes to mix in with the commands which are 
        # changed every 128 iterations
        if idx % 128 == 0:
            # create a few random floats
            numbers = ([myrandom.uniform(-1e30, 1e30) for _ in range(5)]
                       + [myrandom.uniform(-100, 100) for _ in range(5)])
            # add the integer versions also
            numbers += [int(i) for i in numbers]
            printable = [''.join(myrandom.choices(string.printable, k=8)) for _ in range(20)]
            garbage = [''.join(chr(myrandom.randint(0,255)) for _ in range(8)) for _ in range(20)]
            full_chunk_set = (command_strings 
                              + [str(i) for i in numbers]
                              + printable
                              + garbage)

        # number of pieces to combine to create a command (2-5)
        num_chunks = (idx % 4) + 2
        yield ' '.join(myrandom.choices(full_chunk_set, k=num_chunks))
        idx += 1 
Example 69
Project: 0xbtc-discord-price-bot   Author: 0x1d00ffff   File: backoff.py    MIT License 5 votes vote down vote up
def __init__(self, base=1, *, integral=False):
        self._base = base
        
        self._exp = 0
        self._max = 10
        self._reset_time = base * 2 ** 11
        self._last_invocation = time.monotonic()
        
        # Use our own random instance to avoid messing with global one
        rand = random.Random()
        rand.seed()
        
        self._randfunc = rand.rand_range if integral else rand.uniform 
Example 70
Project: Health-Checker   Author: KriAga   File: hmm.py    MIT License 5 votes vote down vote up
def random_sample(self, rng, length):
        """
        Randomly sample the HMM to generate a sentence of a given length. This
        samples the prior distribution then the observation distribution and
        transition distribution for each subsequent observation and state.
        This will mostly generate unintelligible garbage, but can provide some
        amusement.

        :return:        the randomly created state/observation sequence,
                        generated according to the HMM's probability
                        distributions. The SUBTOKENS have TEXT and TAG
                        properties containing the observation and state
                        respectively.
        :rtype:         list
        :param rng:     random number generator
        :type rng:      Random (or any object with a random() method)
        :param length:  desired output length
        :type length:   int
        """

        # sample the starting state and symbol prob dists
        tokens = []
        state = self._sample_probdist(self._priors, rng.random(), self._states)
        symbol = self._sample_probdist(self._outputs[state],
                                  rng.random(), self._symbols)
        tokens.append((symbol, state))

        for i in range(1, length):
            # sample the state transition and symbol prob dists
            state = self._sample_probdist(self._transitions[state],
                                     rng.random(), self._states)
            symbol = self._sample_probdist(self._outputs[state],
                                      rng.random(), self._symbols)
            tokens.append((symbol, state))

        return tokens 
Example 71
Project: Health-Checker   Author: KriAga   File: kmeans.py    MIT License 5 votes vote down vote up
def __init__(self, num_means, distance, repeats=1,
                       conv_test=1e-6, initial_means=None,
                       normalise=False, svd_dimensions=None,
                       rng=None, avoid_empty_clusters=False):

        """
        :param  num_means:  the number of means to use (may use fewer)
        :type   num_means:  int
        :param  distance:   measure of distance between two vectors
        :type   distance:   function taking two vectors and returing a float
        :param  repeats:    number of randomised clustering trials to use
        :type   repeats:    int
        :param  conv_test:  maximum variation in mean differences before
                            deemed convergent
        :type   conv_test:  number
        :param  initial_means: set of k initial means
        :type   initial_means: sequence of vectors
        :param  normalise:  should vectors be normalised to length 1
        :type   normalise:  boolean
        :param svd_dimensions: number of dimensions to use in reducing vector
                               dimensionsionality with SVD
        :type svd_dimensions: int
        :param  rng:        random number generator (or None)
        :type   rng:        Random
        :param avoid_empty_clusters: include current centroid in computation
                                     of next one; avoids undefined behavior
                                     when clusters become empty
        :type avoid_empty_clusters: boolean
        """
        VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
        self._num_means = num_means
        self._distance = distance
        self._max_difference = conv_test
        assert not initial_means or len(initial_means) == num_means
        self._means = initial_means
        assert repeats >= 1
        assert not (initial_means and repeats > 1)
        self._repeats = repeats
        self._rng = (rng if rng else random.Random())
        self._avoid_empty_clusters = avoid_empty_clusters 
Example 72
Project: bert_lamb_pretrain   Author: goldenbili   File: modeling_test.py    Apache License 2.0 5 votes vote down vote up
def ids_tensor(cls, shape, vocab_size, rng=None, name=None):
    """Creates a random int32 tensor of the shape within the vocab size."""
    if rng is None:
      rng = random.Random()

    total_dims = 1
    for dim in shape:
      total_dims *= dim

    values = []
    for _ in range(total_dims):
      values.append(rng.randint(0, vocab_size - 1))

    return tf.constant(value=values, dtype=tf.int32, shape=shape, name=name) 
Example 73
Project: bert_lamb_pretrain   Author: goldenbili   File: create_pretraining_data.py    Apache License 2.0 5 votes vote down vote up
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)

  tokenizer = tokenization.FullTokenizer(
      vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)

  input_files = []
  for input_pattern in FLAGS.input_file.split(","):
    input_files.extend(tf.gfile.Glob(input_pattern))

  tf.logging.info("*** Reading from input files ***")
  for input_file in input_files:
    tf.logging.info("  %s", input_file)

  rng = random.Random(FLAGS.random_seed)
  instances = create_training_instances(
      input_files, tokenizer, FLAGS.max_seq_length, FLAGS.dupe_factor,
      FLAGS.short_seq_prob, FLAGS.masked_lm_prob, FLAGS.max_predictions_per_seq,
      rng)

  output_files = FLAGS.output_file.split(",")
  tf.logging.info("*** Writing to output files ***")
  for output_file in output_files:
    tf.logging.info("  %s", output_file)

  write_instance_to_example_files(instances, tokenizer, FLAGS.max_seq_length,
                                  FLAGS.max_predictions_per_seq, output_files) 
Example 74
Project: datasette   Author: simonw   File: fixtures.py    Apache License 2.0 5 votes vote down vote up
def generate_sortable_rows(num):
    rand = random.Random(42)
    for a, b in itertools.islice(
        itertools.product(string.ascii_lowercase, repeat=2), num
    ):
        yield {
            "pk1": a,
            "pk2": b,
            "content": "{}-{}".format(a, b),
            "sortable": rand.randint(-100, 100),
            "sortable_with_nulls": rand.choice([None, rand.random(), rand.random()]),
            "sortable_with_nulls_2": rand.choice([None, rand.random(), rand.random()]),
            "text": rand.choice(["$null", "$blah"]),
        } 
Example 75
Project: gpsr-command-understanding   Author: nickswalker   File: generate_rephrasing_dataset.py    MIT License 5 votes vote down vote up
def main():
    random_source = random.Random(seed)
    grammar_dir = os.path.abspath(os.path.dirname(__file__) + "/../../resources/generator2018")
    out_file_path = os.path.abspath(
        os.path.dirname(__file__) + "/../../data/rephrasings_data_{}_{}.csv".format(seed, groundings_per_parse))
    cmd_gen = Generator(grammar_format_version=2018)
    generator = load_all_2018_by_cat(cmd_gen, grammar_dir)

    all_examples = []
    for i in range(groundings_per_parse):
        grounded_examples = get_grounding_per_each_parse(generator, random_source)
        random_source.shuffle(grounded_examples)
        all_examples += grounded_examples

    with open(out_file_path, 'w') as csvfile:
        output = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL)
        command_columns = [("command" + str(x), "parse" + str(x), "parse_ground" + str(x)) for x in
                           range(1, rehprasings_per_hit + 1)]
        command_columns = [x for tuple in command_columns for x in tuple]
        output.writerow(command_columns)

        chunks = list(chunker(all_examples, rehprasings_per_hit))
        print("Writing {} HITS".format(len(chunks)))
        for i, chunk in enumerate(chunks):
            if len(chunk) < rehprasings_per_hit:
                needed = rehprasings_per_hit - len(chunk)
                # Sample from previous hits to fill out this last one
                chunk += random_source.sample([pair for chunk in chunks[:i] for pair in chunk], k=needed)
            line = []
            for utterance, parse_anon, parse_ground in chunk:
                line += [tree_printer(utterance), tree_printer(parse_anon), tree_printer(parse_ground)]
            output.writerow(line)

    # Let's verify that we can load the output back in...
    with open(out_file_path, 'r') as csvfile:
        input = csv.DictReader(csvfile)
        for line in input:
            pass
            # print(line) 
Example 76
Project: gpsr-command-understanding   Author: nickswalker   File: test_parser.py    MIT License 5 votes vote down vote up
def test_parse_all_2019_anonymized(self):
        generator = Generator(grammar_format_version=2019)

        grammar_dir = os.path.abspath(os.path.dirname(__file__) + "/../resources/generator2019")
        rules, rules_anon, rules_ground, semantics, entities = load_all_2019(generator, grammar_dir)

        sentences = generate_sentence_parse_pairs(ROOT_SYMBOL, rules_ground, {}, yield_requires_semantics=False,
                                                  random_generator=random.Random(1))
        parser = GrammarBasedParser(rules_anon)

        # Bring me the apple from the fridge to the kitchen
        # ---straight anon to clusters--->
        # Bring me the {ob}  from the {loc} to the {loc}
        # ---Grammar based parser--->
        # (Failure; grammar has numbers on locs)

        # Bring me the apple from the fridge to the kitchen
        # ---id naive number anon--->
        # Bring me the {ob}  from the {loc 1} to the {loc 2}
        # ---Grammar based parser--->
        # (Failure; wrong numbers, or maybe)

        anonymizer = Anonymizer(*entities)
        parser = AnonymizingParser(parser, anonymizer)
        num_tested = 1000
        succeeded = 0
        for sentence, parse in itertools.islice(sentences, num_tested):
            sentence = tree_printer(sentence)
            parsed = parser(sentence)
            if parsed:
                succeeded += 1
            else:
                print(sentence)
                print(anonymizer(sentence))
                print()
                print(parser(anonymizer(sentence)))

        self.assertEqual(succeeded, num_tested) 
Example 77
Project: robot-navigation   Author: ronaldahmed   File: __init__.py    MIT License 4 votes vote down vote up
def random_sample(self, rng, length):
        """
        Randomly sample the HMM to generate a sentence of a given length. This
        samples the prior distribution then the observation distribution and
        transition distribution for each subsequent observation and state.
        This will mostly generate unintelligible garbage, but can provide some
        amusement.

        @return:        the randomly created state/observation sequence,
                        generated according to the HMM's probability
                        distributions. The SUBTOKENS have TEXT and TAG
                        properties containing the observation and state
                        respectively.
        @rtype:         Token
        @param rng:     random number generator
        @type rng:      Random (or any object with a random() method)
        @param length:  desired output length
        @type length:   int
        """
        assert chktype(2, length, types.IntType)

        # load the property names
        SUBTOKENS = self._properties.get('SUBTOKENS', 'SUBTOKENS')
        TEXT = self._properties.get('TEXT', 'TEXT')
        TAG = self._properties.get('TAG', 'TAG')

        # sample the starting state and symbol prob dists
        tokens = []
        state = self._sample_probdist(self._priors, rng.random(), self._states)
        symbol = self._sample_probdist(self._outputs[state],
                                  rng.random(), self._symbols)
        tokens.append(Token(TEXT=symbol, TAG=state))

        for i in range(1, length):
            # sample the state transition and symbol prob dists
            state = self._sample_probdist(self._transitions[state],
                                     rng.random(), self._states)
            symbol = self._sample_probdist(self._outputs[state],
                                      rng.random(), self._symbols)
            tokens.append(Token(TEXT=symbol, TAG=state))

        return Token(SUBTOKENS=tokens) 
Example 78
Project: robot-navigation   Author: ronaldahmed   File: __init__.py    MIT License 4 votes vote down vote up
def demo_bw():
    # demo Baum Welch by generating some sequences and then performing
    # unsupervised training on them

    # example taken from page 381, Huang et al
    symbols = ['up', 'down', 'unchanged']
    states = ['bull', 'bear', 'static']

    def pd(values, samples):
        d = {}
        for value, item in zip(values, samples):
            d[item] = value
        return DictionaryProbDist(d)

    def cpd(array, conditions, samples):
        d = {}
        for values, condition in zip(array, conditions):
            d[condition] = pd(values, samples)
        return DictionaryConditionalProbDist(d)

    A = array([[0.6, 0.2, 0.2], [0.5, 0.3, 0.2], [0.4, 0.1, 0.5]], Float64)
    A = cpd(A, states, states)
    B = array([[0.7, 0.1, 0.2], [0.1, 0.6, 0.3], [0.3, 0.3, 0.4]], Float64)
    B = cpd(B, states, symbols)
    pi = array([0.5, 0.2, 0.3], Float64)
    pi = pd(pi, states)

    model = HiddenMarkovModel(symbols=symbols, states=states,
                              transitions=A, outputs=B, priors=pi)

    # generate some random sequences
    training = []
    import random
    rng = random.Random()
    for i in range(10):
        item = model.random_sample(rng, 5)
        training.append(item)
    training = Token(SUBTOKENS=training)

    # train on those examples, starting with the model that generated them
    trainer = HiddenMarkovModelTrainer(states, symbols)
    hmm = trainer.train_unsupervised(training, model=model, max_iterations=1000) 
Example 79
Project: robot-navigation   Author: ronaldahmed   File: __init__.py    MIT License 4 votes vote down vote up
def __init__(self, num_means, distance, repeats=1,
                       conv_test=1e-6, initial_means=None,
                       normalise=False, svd_dimensions=None,
                       rng=None):
        """
        @param  num_means:  the number of means to use (may use fewer)
        @type   num_means:  int
        @param  distance:   measure of distance between two vectors
        @type   distance:   function taking two vectors and returing a float
        @param  repeats:    number of randomised clustering trials to use
        @type   repeats:    int
        @param  conv_test:  maximum variation in mean differences before
                            deemed convergent
        @type   conv_test:  number
        @param  initial_means: set of k initial means
        @type   initial_means: sequence of vectors
        @param  normalise:  should vectors be normalised to length 1
        @type   normalise:  boolean
        @param svd_dimensions: number of dimensions to use in reducing vector
                               dimensionsionality with SVD
        @type svd_dimensions: int 
        @param  rng:        random number generator (or None)
        @type   rng:        Random
        """
        assert chktype(1, num_means, int)
        #assert chktype(2, distance, ...)
        assert chktype(3, repeats, int)
        assert chktype(4, conv_test, int, float)
        #assert chktype(5, initial_means, [Numeric.array([])], [SparseArray])
        assert chktype(6, normalise, bool)
        assert chktype(7, svd_dimensions, int, types.NoneType)
        VectorSpaceClusterer.__init__(self, normalise, svd_dimensions)
        self._num_means = num_means
        self._distance = distance
        self._max_difference = conv_test
        assert not initial_means or len(initial_means) == num_means
        self._means = initial_means
        assert repeats >= 1
        assert not (initial_means and repeats > 1)
        self._repeats = repeats
        if rng: self._rng = rng
        else:   self._rng = random.Random() 
Example 80
Project: link-prediction_with_deep-learning   Author: cambridgeltl   File: __main__.py    MIT License 4 votes vote down vote up
def process(args):

  if args.format == "adjlist":
    G = graph.load_adjacencylist(args.input, undirected=args.undirected)
  elif args.format == "edgelist":
    G = graph.load_edgelist(args.input, undirected=args.undirected)
  elif args.format == "mat":
    G = graph.load_matfile(args.input, variable_name=args.matfile_variable_name, undirected=args.undirected)
  else:
    raise Exception("Unknown file format: '%s'.  Valid formats: 'adjlist', 'edgelist', 'mat'" % args.format)

  print("Number of nodes: {}".format(len(G.nodes())))

  num_walks = len(G.nodes()) * args.number_walks

  print("Number of walks: {}".format(num_walks))

  data_size = num_walks * args.walk_length

  print("Data size (walks*length): {}".format(data_size))

  if data_size < args.max_memory_data_size:
    print("Walking...")
    walks = graph.build_deepwalk_corpus(G, num_paths=args.number_walks,
                                        path_length=args.walk_length, alpha=0, rand=random.Random(args.seed))
    print("Training...")
    model = Word2Vec(walks, size=args.representation_size, window=args.window_size, min_count=0, workers=args.workers)
  else:
    print("Data size {} is larger than limit (max-memory-data-size: {}).  Dumping walks to disk.".format(data_size, args.max_memory_data_size))
    print("Walking...")

    walks_filebase = args.output + ".walks"
    walk_files = serialized_walks.write_walks_to_disk(G, walks_filebase, num_paths=args.number_walks,
                                         path_length=args.walk_length, alpha=0, rand=random.Random(args.seed),
                                         num_workers=args.workers)

    print("Counting vertex frequency...")
    if not args.vertex_freq_degree:
      vertex_counts = serialized_walks.count_textfiles(walk_files, args.workers)
    else:
      # use degree distribution for frequency in tree
      vertex_counts = G.degree(nodes=G.iterkeys())

    print("Training...")
    model = Skipgram(sentences=serialized_walks.combine_files_iter(walk_files), vocabulary_counts=vertex_counts,
                     size=args.representation_size,
                     window=args.window_size, min_count=0, workers=args.workers)

  model.save_word2vec_format(args.output)