Python data.load() Examples

The following are 23 code examples of data.load(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module data , or try the search function .
Example #1
Source File: client.py    From SplunkForPCAP with MIT License 6 votes vote down vote up
def restart_required(self):
        """Indicates whether splunkd is in a state that requires a restart.

        :return: A ``boolean`` that indicates whether a restart is required.

        """
        response = self.get("messages").body.read()
        messages = data.load(response)['feed']
        if 'entry' not in messages:
            result = False
        else:
            if isinstance(messages['entry'], dict):
                titles = [messages['entry']['title']]
            else:
                titles = [x['title'] for x in messages['entry']]
            result = 'restart_required' in titles
        return result 
Example #2
Source File: load.py    From kaggle-ndsb with MIT License 6 votes vote down vote up
def load_train(self):
        labels = utils.one_hot(data.labels_train, m=121).astype(np.float32)
        split = np.load(DEFAULT_VALIDATION_SPLIT_PATH)

        split = np.load(DEFAULT_VALIDATION_SPLIT_PATH)
        indices_train = split['indices_train']
        indices_valid = split['indices_valid']

        image_shapes = np.asarray([img.shape for img in data.load('train')]).astype(np.float32)
        self.image_shapes_train = image_shapes[indices_train]
        self.image_shapes_valid = image_shapes[indices_valid]

        self.y_train = np.load(self.train_pred_file).astype(np.float32)
        self.y_valid = np.load(self.valid_pred_file).astype(np.float32)
        self.labels_train = labels[indices_train]
        self.labels_valid = labels[indices_valid] 
Example #3
Source File: load.py    From kaggle-ndsb with MIT License 6 votes vote down vote up
def load_train(self):
        labels = utils.one_hot(data.labels_train, m=121).astype(np.float32)
        split = np.load(DEFAULT_VALIDATION_SPLIT_PATH)

        split = np.load(DEFAULT_VALIDATION_SPLIT_PATH)
        indices_train = split['indices_train']
        indices_valid = split['indices_valid']

        image_shapes = np.asarray([img.shape for img in data.load('train')]).astype(np.float32)
        moments = np.load("data/image_moment_stats_v1_train.pkl")

        centroid_distance = np.abs(moments["centroids"][:, [1, 0]] - image_shapes / 2)
        info = np.concatenate((centroid_distance, image_shapes, moments["angles"][:, None], moments["minor_axes"][:, None], moments["major_axes"][:, None]), 1).astype(np.float32)

        self.info_train = info[indices_train]
        self.info_valid = info[indices_valid]

        self.y_train = np.load(self.train_pred_file).astype(np.float32)
        self.y_valid = np.load(self.valid_pred_file).astype(np.float32)
        self.labels_train = labels[indices_train]
        self.labels_valid = labels[indices_valid] 
Example #4
Source File: load.py    From kaggle-ndsb with MIT License 6 votes vote down vote up
def load_test(self):
        self.y_test = np.load(self.test_pred_file).astype(np.float32)
        self.images_test = data.load('test')
        features = np.load("data/features_test.pkl").item()

        if "aaronmoments" in self.features:
            print "aaronmoments"
            def normalize(x):
                return x
                # return (x - x.mean(axis=0,keepdims=True))/x.std(axis=0,keepdims=True)
            image_shapes = np.asarray([img.shape for img in self.images_test]).astype(np.float32)
            moments = np.load("data/image_moment_stats_v1_test.pkl")
            centroid_distance = np.abs(moments["centroids"][:, [1, 0]] - image_shapes / 2)
            angles = moments["angles"][:, None]
            minor_axes = moments["minor_axes"][:, None]
            major_axes = moments["major_axes"][:, None]
            centroid_distance = normalize(centroid_distance)
            angles = normalize(angles)
            minor_axes = normalize(minor_axes)
            major_axes = normalize(major_axes)
            features["aaronmoments"] = np.concatenate([centroid_distance,angles,minor_axes,major_axes], 1).astype(np.float32)

        self.info_test = np.concatenate([features[feat] for feat in self.features], 1).astype(np.float32) 
Example #5
Source File: client.py    From splunk-elasticsearch with Apache License 2.0 6 votes vote down vote up
def restart_required(self):
        """Indicates whether splunkd is in a state that requires a restart.

        :return: A ``boolean`` that indicates whether a restart is required.

        """
        response = self.get("messages").body.read()
        messages = data.load(response)['feed']
        if 'entry' not in messages:
            result = False
        else:
            if isinstance(messages['entry'], dict):
                titles = [messages['entry']['title']]
            else:
                titles = [x['title'] for x in messages['entry']]
            result = 'restart_required' in titles
        return result 
Example #6
Source File: client.py    From splunk-ref-pas-code with Apache License 2.0 6 votes vote down vote up
def restart_required(self):
        """Indicates whether splunkd is in a state that requires a restart.

        :return: A ``boolean`` that indicates whether a restart is required.

        """
        response = self.get("messages").body.read()
        messages = data.load(response)['feed']
        if 'entry' not in messages:
            result = False
        else:
            if isinstance(messages['entry'], dict):
                titles = [messages['entry']['title']]
            else:
                titles = [x['title'] for x in messages['entry']]
            result = 'restart_required' in titles
        return result 
Example #7
Source File: client.py    From splunk-ref-pas-code with Apache License 2.0 5 votes vote down vote up
def refresh(self, state=None):
        """Refreshes the state of this entity.

        If *state* is provided, load it as the new state for this
        entity. Otherwise, make a roundtrip to the server (by calling
        the :meth:`read` method of ``self``) to fetch an updated state,
        plus at most two additional round trips if
        the ``autologin`` field of :func:`connect` is set to ``True``.

        :param state: Entity-specific arguments (optional).
        :type state: ``dict``
        :raises EntityDeletedException: Raised if the entity no longer exists on
            the server.

        **Example**::

            import splunklib.client as client
            s = client.connect(...)
            search = s.apps['search']
            search.refresh()
        """
        if state is not None:
            self._state = state
        else:
            self._state = self.read(self.get())
        return self 
Example #8
Source File: load.py    From kaggle-ndsb with MIT License 5 votes vote down vote up
def load_test(self):
        self.images_test = data.load('test') 
Example #9
Source File: load.py    From kaggle-ndsb with MIT License 5 votes vote down vote up
def load_train(self):
        images = data.load('train')
        labels = utils.one_hot(data.labels_train, m=121).astype(np.float32)

        split = np.load(self.validation_split_path)
        indices_train = split['indices_train']
        indices_valid = split['indices_valid']

        self.images_train = images[indices_train]
        self.labels_train = labels[indices_train]
        self.images_valid = images[indices_valid]
        self.labels_valid = labels[indices_valid] 
Example #10
Source File: client.py    From splunk-elasticsearch with Apache License 2.0 5 votes vote down vote up
def _load_atom(response, match=None):
    return data.load(response.body.read(), match)


# Load an array of atom entries from the body of the given response 
Example #11
Source File: client.py    From splunk-ref-pas-code with Apache License 2.0 5 votes vote down vote up
def _load_atom(response, match=None):
    return data.load(response.body.read(), match)

# Load an array of atom entries from the body of the given response 
Example #12
Source File: client.py    From splunk-elasticsearch with Apache License 2.0 5 votes vote down vote up
def refresh(self, state=None):
        """Refreshes the state of this entity.

        If *state* is provided, load it as the new state for this
        entity. Otherwise, make a roundtrip to the server (by calling
        the :meth:`read` method of ``self``) to fetch an updated state,
        plus at most two additional round trips if
        the ``autologin`` field of :func:`connect` is set to ``True``.

        :param state: Entity-specific arguments (optional).
        :type state: ``dict``
        :raises EntityDeletedException: Raised if the entity no longer exists on
            the server.

        **Example**::

            import splunklib.client as client
            s = client.connect(...)
            search = s.apps['search']
            search.refresh()
        """
        if state is not None:
            self._state = state
        else:
            self._state = self.read(self.get())
        return self 
Example #13
Source File: client.py    From SplunkForPCAP with MIT License 5 votes vote down vote up
def _load_atom(response, match=None):
    return data.load(response.body.read(), match)


# Load an array of atom entries from the body of the given response 
Example #14
Source File: load.py    From kaggle-ndsb with MIT License 5 votes vote down vote up
def load_test(self):
        self.y_test = np.load(self.test_pred_file).astype(np.float32)
        self.images_test = data.load('test')
        image_shapes_test = np.asarray([img.shape for img in self.images_test]).astype(np.float32)
        moments_test = np.load("data/image_moment_stats_v1_test.pkl")
        centroid_distance = np.abs(moments_test["centroids"][:, [1, 0]] - image_shapes_test / 2)
        self.info_test = np.concatenate((centroid_distance, image_shapes_test, moments_test["angles"][:, None], moments_test["minor_axes"][:, None], moments_test["major_axes"][:, None]), 1).astype(np.float32)
        # self.info_test = np.concatenate((image_shapes_test, moments_test["centroids"], moments_test["minor_axes"][:, None], moments_test["major_axes"][:, None]), 1).astype(np.float32) 
Example #15
Source File: load.py    From kaggle-ndsb with MIT License 5 votes vote down vote up
def load_train(self):
        labels = utils.one_hot(data.labels_train, m=121).astype(np.float32)
        split = np.load(DEFAULT_VALIDATION_SPLIT_PATH)

        split = np.load(DEFAULT_VALIDATION_SPLIT_PATH)
        indices_train = split['indices_train']
        indices_valid = split['indices_valid']
        features = np.load("data/features_train.pkl").item()

        if "aaronmoments" in self.features:
            print "aaronmoments"
            def normalize(x):
                return x
                # return (x - x.mean(axis=0,keepdims=True))/x.std(axis=0,keepdims=True)
            image_shapes = np.asarray([img.shape for img in data.load('train')]).astype(np.float32)
            moments = np.load("data/image_moment_stats_v1_train.pkl")
            centroid_distance = np.abs(moments["centroids"][:, [1, 0]] - image_shapes / 2)
            angles = moments["angles"][:, None]
            minor_axes = moments["minor_axes"][:, None]
            major_axes = moments["major_axes"][:, None]
            centroid_distance = normalize(centroid_distance)
            angles = normalize(angles)
            minor_axes = normalize(minor_axes)
            major_axes = normalize(major_axes)
            features["aaronmoments"] = np.concatenate([centroid_distance,angles,minor_axes,major_axes], 1).astype(np.float32)

        info = np.concatenate([features[feat] for feat in self.features], 1).astype(np.float32)

        print info.shape

        self.info_train = info[indices_train]
        self.info_valid = info[indices_valid]

        self.y_train = np.load(self.train_pred_file).astype(np.float32)
        self.y_valid = np.load(self.valid_pred_file).astype(np.float32)
        self.labels_train = labels[indices_train]
        self.labels_valid = labels[indices_valid] 
Example #16
Source File: client.py    From SplunkForPCAP with MIT License 5 votes vote down vote up
def refresh(self, state=None):
        """Refreshes the state of this entity.

        If *state* is provided, load it as the new state for this
        entity. Otherwise, make a roundtrip to the server (by calling
        the :meth:`read` method of ``self``) to fetch an updated state,
        plus at most two additional round trips if
        the ``autologin`` field of :func:`connect` is set to ``True``.

        :param state: Entity-specific arguments (optional).
        :type state: ``dict``
        :raises EntityDeletedException: Raised if the entity no longer exists on
            the server.

        **Example**::

            import splunklib.client as client
            s = client.connect(...)
            search = s.apps['search']
            search.refresh()
        """
        if state is not None:
            self._state = state
        else:
            self._state = self.read(self.get())
        return self 
Example #17
Source File: load.py    From kaggle-ndsb with MIT License 4 votes vote down vote up
def load_train(self):
        train_images = data.load('train')
        train_labels = utils.one_hot(data.labels_train).astype(np.float32)

        if ("valid_pred_file" in self.__dict__):
            valid_pseudo_labels = np.load(self.valid_pred_file).astype(np.float32)
        else:
            print "No valid_pred_file set. Only using test-set for pseudolabeling!!"

        shuffle = np.load("test_shuffle_seed0.npy")
        if not ("shard" in self.__dict__):
            raise ValueError("Missing argument: shard: (should be value in {0, 1, 2})")
        if not self.shard in [0, 1, 2]:
            raise ValueError("Wrong argument: shard: (should be value in {0, 1, 2})")
        N = len(shuffle)
        if self.shard == 0:
            train_shard = shuffle[N/3:]
        if self.shard == 1:
            train_shard = np.concatenate((shuffle[:N/3], shuffle[2*N/3:]))
        if self.shard == 2:
            train_shard = shuffle[:2*N/3]

        test_images = data.load('test')[train_shard]
        test_pseudo_labels = np.load(self.test_pred_file)[train_shard].astype(np.float32)
        print test_pseudo_labels.shape

        if not hasattr(self, 'validation_split_path'):
            self.validation_split_path = DEFAULT_VALIDATION_SPLIT_PATH
        split = np.load(self.validation_split_path)
        indices_train = split['indices_train']
        indices_valid = split['indices_valid']

        self.images_train = train_images[indices_train]
        self.labels_train = train_labels[indices_train]
        if ("valid_pred_file" in self.__dict__):
            self.images_pseudo = np.concatenate((train_images[indices_valid], test_images), 0)
            self.labels_pseudo = np.concatenate((valid_pseudo_labels, test_pseudo_labels), 0)
        else:
            self.images_pseudo = test_images
            self.labels_pseudo = test_pseudo_labels

        self.images_valid = train_images[indices_valid]
        self.labels_valid = train_labels[indices_valid] 
Example #18
Source File: client.py    From SplunkForPCAP with MIT License 4 votes vote down vote up
def iter(self, offset=0, count=None, pagesize=None, **kwargs):
        """Iterates over the collection.

        This method is equivalent to the :meth:`list` method, but
        it returns an iterator and can load a certain number of entities at a
        time from the server.

        :param offset: The index of the first entity to return (optional).
        :type offset: ``integer``
        :param count: The maximum number of entities to return (optional).
        :type count: ``integer``
        :param pagesize: The number of entities to load (optional).
        :type pagesize: ``integer``
        :param kwargs: Additional arguments (optional):

            - "search" (``string``): The search query to filter responses.

            - "sort_dir" (``string``): The direction to sort returned items:
              "asc" or "desc".

            - "sort_key" (``string``): The field to use for sorting (optional).

            - "sort_mode" (``string``): The collating sequence for sorting
              returned items: "auto", "alpha", "alpha_case", or "num".

        :type kwargs: ``dict``

        **Example**::

            import splunklib.client as client
            s = client.connect(...)
            for saved_search in s.saved_searches.iter(pagesize=10):
                # Loads 10 saved searches at a time from the
                # server.
                ...
        """
        assert pagesize is None or pagesize > 0
        if count is None:
            count = self.null_count
        fetched = 0
        while count == self.null_count or fetched < count:
            response = self.get(count=pagesize or count, offset=offset, **kwargs)
            items = self._load_list(response)
            N = len(items)
            fetched += N
            for item in items:
                yield item
            if pagesize is None or N < pagesize:
                break
            offset += N
            logging.debug("pagesize=%d, fetched=%d, offset=%d, N=%d, kwargs=%s", pagesize, fetched, offset, N, kwargs)

    # kwargs: count, offset, search, sort_dir, sort_key, sort_mode 
Example #19
Source File: models_iic.py    From IIC with MIT License 4 votes vote down vote up
def __classification_accuracy(self, sess, iter_init, idx, y_ph=None):
        """
        :param sess: TensorFlow session
        :param iter_init: TensorFlow data iterator initializer associated
        :param idx: insertion index (i.e. epoch - 1)
        :param y_ph: TensorFlow placeholder for unseen labels
        :return: None
        """
        if self.perf is None or y_ph is None:
            return

        # initialize results
        y = np.zeros([0, 1])
        y_hats = [np.zeros([0, 1])] * self.num_B_sub_heads

        # initialize unsupervised data iterator
        sess.run(iter_init)

        # loop over the batches within the unsupervised data iterator
        print('Evaluating classification accuracy... ')
        while True:
            try:
                # grab the results
                results = sess.run([self.y_hats, y_ph], feed_dict={self.is_training: False})

                # load metrics
                for i in range(self.num_B_sub_heads):
                    y_hats[i] = np.concatenate((y_hats[i], np.expand_dims(results[0][i], axis=1)))
                if y_ph is not None:
                    y = np.concatenate((y, np.expand_dims(results[1], axis=1)))

                # _, ax = plt.subplots(2, 10)
                # i_rand = np.random.choice(results[3].shape[0], 10)
                # for i in range(10):
                #     ax[0, i].imshow(results[3][i_rand[i]][:, :, 0], origin='upper', vmin=0, vmax=1)
                #     ax[0, i].set_xticks([])
                #     ax[0, i].set_yticks([])
                #     ax[1, i].imshow(results[4][i_rand[i]][:, :, 0], origin='upper', vmin=0, vmax=1)
                #     ax[1, i].set_xticks([])
                #     ax[1, i].set_yticks([])
                # plt.show()

            # iterator will throw this error when its out of data
            except tf.errors.OutOfRangeError:
                break

        # compute classification accuracy
        if y_ph is not None:
            class_errors = [unsupervised_labels(y, y_hats[i], self.k_B, self.k_B)
                            for i in range(self.num_B_sub_heads)]
            self.perf['class_err_min'][idx] = np.min(class_errors)
            self.perf['class_err_avg'][idx] = np.mean(class_errors)
            self.perf['class_err_max'][idx] = np.max(class_errors)

        # metrics are done
        print('Done') 
Example #20
Source File: client.py    From splunk-elasticsearch with Apache License 2.0 4 votes vote down vote up
def iter(self, offset=0, count=None, pagesize=None, **kwargs):
        """Iterates over the collection.

        This method is equivalent to the :meth:`list` method, but
        it returns an iterator and can load a certain number of entities at a
        time from the server.

        :param offset: The index of the first entity to return (optional).
        :type offset: ``integer``
        :param count: The maximum number of entities to return (optional).
        :type count: ``integer``
        :param pagesize: The number of entities to load (optional).
        :type pagesize: ``integer``
        :param kwargs: Additional arguments (optional):

            - "search" (``string``): The search query to filter responses.

            - "sort_dir" (``string``): The direction to sort returned items:
              "asc" or "desc".

            - "sort_key" (``string``): The field to use for sorting (optional).

            - "sort_mode" (``string``): The collating sequence for sorting
              returned items: "auto", "alpha", "alpha_case", or "num".

        :type kwargs: ``dict``

        **Example**::

            import splunklib.client as client
            s = client.connect(...)
            for saved_search in s.saved_searches.iter(pagesize=10):
                # Loads 10 saved searches at a time from the
                # server.
                ...
        """
        assert pagesize is None or pagesize > 0
        if count is None:
            count = self.null_count
        fetched = 0
        while count == self.null_count or fetched < count:
            response = self.get(count=pagesize or count, offset=offset, **kwargs)
            items = self._load_list(response)
            N = len(items)
            fetched += N
            for item in items:
                yield item
            if pagesize is None or N < pagesize:
                break
            offset += N
            logging.debug("pagesize=%d, fetched=%d, offset=%d, N=%d, kwargs=%s", pagesize, fetched, offset, N, kwargs)

    # kwargs: count, offset, search, sort_dir, sort_key, sort_mode 
Example #21
Source File: main.py    From punctuator2 with MIT License 4 votes vote down vote up
def get_minibatch(file_name, batch_size, shuffle, with_pauses=False):

    dataset = data.load(file_name)

    if shuffle:
        np.random.shuffle(dataset)

    X_batch = []
    Y_batch = []
    if with_pauses:
        P_batch = []

    if len(dataset) < batch_size:
        print("WARNING: Not enough samples in '%s'. Reduce mini-batch size to %d or use a dataset with at least %d words." % (
            file_name,
            len(dataset),
            MINIBATCH_SIZE * data.MAX_SEQUENCE_LEN))

    for subsequence in dataset:

        X_batch.append(subsequence[0])
        Y_batch.append(subsequence[1])
        if with_pauses:
            P_batch.append(subsequence[2])
        
        if len(X_batch) == batch_size:

            # Transpose, because the model assumes the first axis is time
            X = np.array(X_batch, dtype=np.int32).T
            Y = np.array(Y_batch, dtype=np.int32).T
            if with_pauses:
                P = np.array(P_batch, dtype=theano.config.floatX).T
            
            if with_pauses:
                yield X, Y, P
            else:
                yield X, Y

            X_batch = []
            Y_batch = []
            if with_pauses:
                P_batch = [] 
Example #22
Source File: data_update.py    From Deep-Reinforcement-Learning-in-Large-Discrete-Action-Spaces with MIT License 4 votes vote down vote up
def update_pickle_file(file_name, eps=0, k=0, v=0):
    d_old = data_old.Data(file_name)
    d_old.load()
    print(file_name, 'loaded')
    # d_old.print_fields()

    d_new = data.Data()
    d_new.set_agent('Wolp',
                    int(d_old.get_data('max_actions')[0]),
                    k,
                    v)
    d_new.set_experiment(d_old.get_data('experiment')[0],
                         [-3],
                         [3],
                         eps)

    space = action_space.Space([-3], [3], int(d_old.get_data('max_actions')[0]))
    # print(space.get_space())
    # d_new.print_data()

    done = d_old.get_data('done')
    actors_result = d_old.get_data('actors_result')
    actions = d_old.get_data('actions')
    state_0 = d_old.get_data('state_0').tolist()
    state_1 = d_old.get_data('state_1').tolist()
    state_2 = d_old.get_data('state_2').tolist()
    state_3 = d_old.get_data('state_3').tolist()
    rewards = d_old.get_data('rewards').tolist()
    ep = 0
    temp = 0
    l = len(done)
    for i in range(l):
        d_new.set_action(space.import_point(actions[i]).tolist())
        d_new.set_actors_action(space.import_point(actors_result[i]).tolist())
        d_new.set_ndn_action(space.import_point(
            space.search_point(actors_result[i], 1)[0]).tolist())
        state = [state_0[i], state_1[i], state_2[i], state_3[i]]
        d_new.set_state(state)
        d_new.set_reward(1)
        if done[i] > 0:
            # print(ep, i - temp, 'progress', i / l)
            temp = i

            ep += 1
            # if ep % 200 == 199:
            #     d_new.finish_and_store_episode()
            # else:
            d_new.end_of_episode()

    d_new.save() 
Example #23
Source File: client.py    From splunk-ref-pas-code with Apache License 2.0 4 votes vote down vote up
def iter(self, offset=0, count=None, pagesize=None, **kwargs):
        """Iterates over the collection.

        This method is equivalent to the :meth:`list` method, but
        it returns an iterator and can load a certain number of entities at a
        time from the server.

        :param offset: The index of the first entity to return (optional).
        :type offset: ``integer``
        :param count: The maximum number of entities to return (optional).
        :type count: ``integer``
        :param pagesize: The number of entities to load (optional).
        :type pagesize: ``integer``
        :param kwargs: Additional arguments (optional):

            - "search" (``string``): The search query to filter responses.

            - "sort_dir" (``string``): The direction to sort returned items:
              "asc" or "desc".

            - "sort_key" (``string``): The field to use for sorting (optional).

            - "sort_mode" (``string``): The collating sequence for sorting
              returned items: "auto", "alpha", "alpha_case", or "num".

        :type kwargs: ``dict``

        **Example**::

            import splunklib.client as client
            s = client.connect(...)
            for saved_search in s.saved_searches.iter(pagesize=10):
                # Loads 10 saved searches at a time from the
                # server.
                ...
        """
        assert pagesize is None or pagesize > 0
        if count is None:
            count = self.null_count
        fetched = 0
        while count == self.null_count or fetched < count:
            response = self.get(count=pagesize or count, offset=offset, **kwargs)
            items = self._load_list(response)
            N = len(items)
            fetched += N
            for item in items:
                yield item
            if pagesize is None or N < pagesize:
                break
            offset += N
            logging.debug("pagesize=%d, fetched=%d, offset=%d, N=%d, kwargs=%s", pagesize, fetched, offset, N, kwargs)

    # kwargs: count, offset, search, sort_dir, sort_key, sort_mode