Python pickle.load() Examples

The following are code examples for showing how to use pickle.load(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: model-api-sequence   Author: evandowning   File: evaluation.py    GNU General Public License v3.0 6 votes vote down vote up
def sequence_generator(fn,n):
    xSet = np.array([])
    ySet = np.array([])

    x = np.array([])
    y = np.array([])

    num = 0

    # Read in sample's sequences
    with open(fn, 'rb') as fr:
        for e in enumerate(range(n)):
            t = pkl.load(fr)
            x = t[0]
            y = t[1]

            if len(xSet) == 0:
                xSet = x
                ySet = y
            else:
                xSet = np.vstack([xSet,x])
                ySet = np.append(ySet,y)

    return xSet,ySet 
Example 2
Project: model-api-sequence   Author: evandowning   File: color.py    GNU General Public License v3.0 6 votes vote down vote up
def extract(folder,fn,num,width):
    label = None
    seq = list()

    # Read in sample's sequence
    path = os.path.join(folder,fn+'.pkl')
    with open(path,'rb') as fr:
        for i in range(num):
            t = pkl.load(fr)
            label = t[1]

            # Replace API call integers with pixel values
            seq.extend([api_md5(str(api)) for api in t[0]])

    # Pad array if it's not divisible by width (3 channels for RGB)
    r = len(seq) % (width*3)
    if r != 0:
        seq.extend([api_md5('0')]*(width*3-r))

    # Reshape numpy array (3 channels)
    data = np.reshape(np.array(seq), (-1,width*3))
    data = data.astype(np.int8)

    return fn,data,label 
Example 3
Project: alfred-yubikey-otp   Author: robertoriv   File: workflow.py    MIT License 6 votes vote down vote up
def register(self, name, serializer):
        """Register ``serializer`` object under ``name``.

        Raises :class:`AttributeError` if ``serializer`` in invalid.

        .. note::

            ``name`` will be used as the file extension of the saved files.

        :param name: Name to register ``serializer`` under
        :type name: ``unicode`` or ``str``
        :param serializer: object with ``load()`` and ``dump()``
            methods

        """
        # Basic validation
        getattr(serializer, 'load')
        getattr(serializer, 'dump')

        self._serializers[name] = serializer 
Example 4
Project: pyblish-win   Author: pyblish   File: trace.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def __init__(self, counts=None, calledfuncs=None, infile=None,
                 callers=None, outfile=None):
        self.counts = counts
        if self.counts is None:
            self.counts = {}
        self.counter = self.counts.copy() # map (filename, lineno) to count
        self.calledfuncs = calledfuncs
        if self.calledfuncs is None:
            self.calledfuncs = {}
        self.calledfuncs = self.calledfuncs.copy()
        self.callers = callers
        if self.callers is None:
            self.callers = {}
        self.callers = self.callers.copy()
        self.infile = infile
        self.outfile = outfile
        if self.infile:
            # Try to merge existing counts file.
            try:
                counts, calledfuncs, callers = \
                        pickle.load(open(self.infile, 'rb'))
                self.update(self.__class__(counts, calledfuncs, callers))
            except (IOError, EOFError, ValueError), err:
                print >> sys.stderr, ("Skipping counts file %r: %s"
                                      % (self.infile, err)) 
Example 5
Project: pyblish-win   Author: pyblish   File: test_signal.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_itimer_virtual(self):
        self.itimer = signal.ITIMER_VIRTUAL
        signal.signal(signal.SIGVTALRM, self.sig_vtalrm)
        signal.setitimer(self.itimer, 0.3, 0.2)

        start_time = time.time()
        while time.time() - start_time < 60.0:
            # use up some virtual time by doing real work
            _ = pow(12345, 67890, 10000019)
            if signal.getitimer(self.itimer) == (0.0, 0.0):
                break # sig_vtalrm handler stopped this itimer
        else: # Issue 8424
            self.skipTest("timeout: likely cause: machine too slow or load too "
                          "high")

        # virtual itimer should be (0.0, 0.0) now
        self.assertEqual(signal.getitimer(self.itimer), (0.0, 0.0))
        # and the handler should have been called
        self.assertEqual(self.hndl_called, True)

    # Issue 3864. Unknown if this affects earlier versions of freebsd also. 
Example 6
Project: pyblish-win   Author: pyblish   File: forking.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def main():
        '''
        Run code specified by data received over pipe
        '''
        assert is_forking(sys.argv)

        handle = int(sys.argv[-1])
        fd = msvcrt.open_osfhandle(handle, os.O_RDONLY)
        from_parent = os.fdopen(fd, 'rb')

        process.current_process()._inheriting = True
        preparation_data = load(from_parent)
        prepare(preparation_data)
        self = load(from_parent)
        process.current_process()._inheriting = False

        from_parent.close()

        exitcode = self._bootstrap()
        exit(exitcode) 
Example 7
Project: wechat-alfred-workflow   Author: TKkk-iOSer   File: workflow.py    MIT License 6 votes vote down vote up
def register(self, name, serializer):
        """Register ``serializer`` object under ``name``.

        Raises :class:`AttributeError` if ``serializer`` in invalid.

        .. note::

            ``name`` will be used as the file extension of the saved files.

        :param name: Name to register ``serializer`` under
        :type name: ``unicode`` or ``str``
        :param serializer: object with ``load()`` and ``dump()``
            methods

        """
        # Basic validation
        getattr(serializer, 'load')
        getattr(serializer, 'dump')

        self._serializers[name] = serializer 
Example 8
Project: Collaborative-Learning-for-Weakly-Supervised-Object-Detection   Author: Sunarker   File: train_val.py    MIT License 6 votes vote down vote up
def from_snapshot(self, sfile, nfile):
    print('Restoring model snapshots from {:s}'.format(sfile))
    self.net.load_state_dict(torch.load(str(sfile)))
    print('Restored.')
    # Needs to restore the other hyper-parameters/states for training, (TODO xinlei) I have
    # tried my best to find the random states so that it can be recovered exactly
    # However the Tensorflow state is currently not available
    with open(nfile, 'rb') as fid:
      st0 = pickle.load(fid)
      cur = pickle.load(fid)
      perm = pickle.load(fid)
      cur_val = pickle.load(fid)
      perm_val = pickle.load(fid)
      last_snapshot_iter = pickle.load(fid)

      np.random.set_state(st0)
      self.data_layer._cur = cur
      self.data_layer._perm = perm
      self.data_layer_val._cur = cur_val
      self.data_layer_val._perm = perm_val

    return last_snapshot_iter 
Example 9
Project: Collaborative-Learning-for-Weakly-Supervised-Object-Detection   Author: Sunarker   File: pascal_voc.py    MIT License 6 votes vote down vote up
def gt_roidb(self):
    """
    Return the database of ground-truth regions of interest.

    This function loads/saves from/to a cache file to speed up future calls.
    """
    cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
    if os.path.exists(cache_file):
      with open(cache_file, 'rb') as fid:
        try:
          roidb = pickle.load(fid)
        except:
          roidb = pickle.load(fid, encoding='bytes')
      print('{} gt roidb loaded from {}'.format(self.name, cache_file))
      return roidb

    gt_roidb = [self._load_pascal_labels(index)
                for index in self.image_index]
    with open(cache_file, 'wb') as fid:
      pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL)
    print('wrote gt roidb to {}'.format(cache_file))

    return gt_roidb 
Example 10
Project: Collaborative-Learning-for-Weakly-Supervised-Object-Detection   Author: Sunarker   File: coco.py    MIT License 6 votes vote down vote up
def gt_roidb(self):
    """
    Return the database of ground-truth regions of interest.
    This function loads/saves from/to a cache file to speed up future calls.
    """
    cache_file = osp.join(self.cache_path, self.name + '_gt_roidb.pkl')
    if osp.exists(cache_file):
      with open(cache_file, 'rb') as fid:
        roidb = pickle.load(fid)
      print('{} gt roidb loaded from {}'.format(self.name, cache_file))
      return roidb

    gt_roidb = [self._load_coco_annotation(index)
                for index in self._image_index]

    with open(cache_file, 'wb') as fid:
      pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL)
    print('wrote gt roidb to {}'.format(cache_file))
    return gt_roidb 
Example 11
Project: Flask-Python-GAE-Login-Registration   Author: orymeyer   File: sessions.py    Apache License 2.0 6 votes vote down vote up
def get(self, sid):
        if not self.is_valid_key(sid):
            return self.new()
        try:
            f = open(self.get_session_filename(sid), 'rb')
        except IOError:
            if self.renew_missing:
                return self.new()
            data = {}
        else:
            try:
                try:
                    data = load(f)
                except Exception:
                    data = {}
            finally:
                f.close()
        return self.session_class(data, sid, False) 
Example 12
Project: Flask-Python-GAE-Login-Registration   Author: orymeyer   File: sessions.py    Apache License 2.0 6 votes vote down vote up
def get(self, sid):
        if not self.is_valid_key(sid):
            return self.new()
        try:
            f = open(self.get_session_filename(sid), 'rb')
        except IOError:
            if self.renew_missing:
                return self.new()
            data = {}
        else:
            try:
                try:
                    data = load(f)
                except Exception:
                    data = {}
            finally:
                f.close()
        return self.session_class(data, sid, False) 
Example 13
Project: wikilinks   Author: trovdimi   File: decorators.py    MIT License 6 votes vote down vote up
def __call__(self, *args, **kwargs):
        fkey = args[0].__class__.__name__ + '_' + self.func.__name__
        self.filepath = os.path.join(CACHE_FOLDER, fkey + '.obj')

        if self.cache is None:
            if os.path.exists(self.filepath):
                print('loading', self.filepath, '...')
                with open(self.filepath, 'rb') as infile:
                    self.cache = pickle.load(infile)
            else:
                self.cache = {}

        pickled = pickle.dumps(args[1:], -1) + pickle.dumps(kwargs, -1)
        key = hashlib.sha1(pickled).hexdigest()
        try:
            return self.cache[key]
        except KeyError:
            # print(fkey, 'key not found...')
            self.modified = True
            value = self.func(*args, **kwargs)
            self.cache[key] = value
            return value 
Example 14
Project: wikilinks   Author: trovdimi   File: redirectscandidatespostioninserter.py    MIT License 6 votes vote down vote up
def manageWork(self):
        #file = open("/home/ddimitrov/20160305_en_wikilinks/tmp/missing_article_ids.p",'r')
        file = open(SSD_HOME+"pickle/redirects_ids.obj",'r')
        object_file = pickle.load(file)
        #print object_file
        #print type(object_file)
        for root, dirs, files in os.walk(STATIC_HTML_DUMP_ARTICLES_DIR+self.path):
            for i, file_name in enumerate(files):
                if file_name.endswith(".zip"):
                    parts = file_name.split('_')
                    if long(parts[1]) in object_file:

                        try:
                            self.parse_article(file_name,root)
                        except  Exception as e:
                            print("FILENAME_FAIL:"+file_name)
                            print(type(e))    # the exception instance
                            print(e)
                            print (e.message) 
Example 15
Project: esjspy   Author: Dothion   File: base.py    GNU General Public License v3.0 5 votes vote down vote up
def _scan(identifier: str, cache_dir: Path = Path(CACHE_DIR)) -> Any:
    md5 = _gen_md5(identifier)
    cache_dir = cache_dir / md5[0:2] / md5[2:4]
    cache_path = cache_dir / md5
    logger.debug('从缓存中读取了 %s 的内容。' % identifier if LANGUAGE in _simplified else
                 '從 cache 中讀取了 %s 的內容。' % identifier)
    with cache_path.open('rb') as f:
        return pickle.load(f) 
Example 16
Project: fs_image   Author: facebookincubator   File: demo_sendstreams.py    MIT License 5 votes vote down vote up
def gold_demo_sendstreams():
    with open(
        os.path.join(
            # We are part of the library interface, but __file__ works because:
            #   (a) we never use par_style = "fastzip", and
            #   (b) the gold data is baked into the PAR for reading.
            os.path.dirname(os.path.abspath(__file__)),
            'gold_demo_sendstreams.pickle',
        ),
        'rb',
    ) as f:
        return pickle.load(f) 
Example 17
Project: fs_image   Author: facebookincubator   File: print_gold_demo_sendstreams.py    MIT License 5 votes vote down vote up
def main(argv):
    if len(argv) != 2:
        print(__doc__, file=sys.stderr)
        return 1

    with open(os.path.join(
        os.path.dirname(__file__), 'gold_demo_sendstreams.pickle',
    ), "rb") as infile:
        sys.stdout.buffer.write(pickle.load(infile)[argv[1]]["sendstream"])
    return 0 
Example 18
Project: f5go   Author: f5devcentral   File: go.py    MIT License 5 votes vote down vote up
def load(db=cfg_fnDatabase):
        """Attempt to load the database defined at cfg_fnDatabase. Create a
        new one if the database doesn't already exist.
        """
        try:
            print("Loading DB from %s" % db)
            return pickle.load(open(db, 'rb'))
        except IOError:
            print(sys.exc_info()[1])
            print("Creating new database...")
            return LinkDatabase() 
Example 19
Project: alfred-yubikey-otp   Author: robertoriv   File: workflow.py    MIT License 5 votes vote down vote up
def load(cls, file_obj):
        """Load serialized object from open JSON file.

        .. versionadded:: 1.8

        :param file_obj: file handle
        :type file_obj: ``file`` object
        :returns: object loaded from JSON file
        :rtype: object

        """
        return json.load(file_obj) 
Example 20
Project: alfred-yubikey-otp   Author: robertoriv   File: workflow.py    MIT License 5 votes vote down vote up
def load(cls, file_obj):
        """Load serialized object from open pickle file.

        .. versionadded:: 1.8

        :param file_obj: file handle
        :type file_obj: ``file`` object
        :returns: object loaded from pickle file
        :rtype: object

        """
        return cPickle.load(file_obj) 
Example 21
Project: alfred-yubikey-otp   Author: robertoriv   File: workflow.py    MIT License 5 votes vote down vote up
def _load(self):
        """Load cached settings from JSON file `self._filepath`."""
        data = {}
        with LockFile(self._filepath, 0.5):
            with open(self._filepath, 'rb') as fp:
                data.update(json.load(fp))

        self._original = deepcopy(data)

        self._nosave = True
        self.update(data)
        self._nosave = False 
Example 22
Project: alfred-yubikey-otp   Author: robertoriv   File: workflow.py    MIT License 5 votes vote down vote up
def cached_data(self, name, data_func=None, max_age=60):
        """Return cached data if younger than ``max_age`` seconds.

        Retrieve data from cache or re-generate and re-cache data if
        stale/non-existant. If ``max_age`` is 0, return cached data no
        matter how old.

        :param name: name of datastore
        :param data_func: function to (re-)generate data.
        :type data_func: ``callable``
        :param max_age: maximum age of cached data in seconds
        :type max_age: ``int``
        :returns: cached data, return value of ``data_func`` or ``None``
            if ``data_func`` is not set

        """
        serializer = manager.serializer(self.cache_serializer)

        cache_path = self.cachefile('%s.%s' % (name, self.cache_serializer))
        age = self.cached_data_age(name)

        if (age < max_age or max_age == 0) and os.path.exists(cache_path):

            with open(cache_path, 'rb') as file_obj:
                self.logger.debug('loading cached data: %s', cache_path)
                return serializer.load(file_obj)

        if not data_func:
            return None

        data = data_func()
        self.cache_data(name, data)

        return data 
Example 23
Project: explirefit   Author: codogogo   File: io_helper.py    Apache License 2.0 5 votes vote down vote up
def deserialize(path):
	return pickle.load(open(path, "rb" )) 
Example 24
Project: rhodonite   Author: nestauk   File: build_phylomemetic_graph.py    MIT License 5 votes vote down vote up
def build(input, output, min_clique_size, parent_limit, 
        workers, chunksize):
    '''from_communities
    Creates and saves a phylomemetic graph from an input of temporal communities.

    Args:
        input (:obj:`str`): Path to input pickled dictionary of communities.
        output (:obj:`str`): Output directory for results (.gt format).
        min_clique_size (:obj:`int`): Minimum community size to consider.
        parent_limit (:obj:`int`): Maximum number of parents to consider.
        workers (:obj:`str` or :obj:`int`): Number of processes to use. Either 
            provide integer, or "auto". Default is "auto".
        chunksize (:obj:`str` or :obj:`int`): Number of communities for each 
            worker to process at a time. Either provide integer or "auto".
            Default is "auto".
    '''

    with open(input, 'rb') as f:
        communities = pickle.load(f)
    
    if chunksize.isnumeric():
        chunksize = int(chunksize)
    if workers.isnumeric():
        workers = int(workers)

    save_dir = os.path.join(*input.split(os.sep)[:-1])
    if os.path.isdir(save_dir):
        pg = phylomemetic_graph(
                community_sets=list(communities.values()),
                labels=list(communities.keys()),
                min_clique_size=min_clique_size, 
                parent_limit=parent_limit,
                workers=workers,
                chunksize=chunksize,
                )
        pg.save(output)
    else:
        click.echo('Output directory does not exist.') 
Example 25
Project: pyblish-win   Author: pyblish   File: webchecker.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def load_pickle(dumpfile=DUMPFILE, verbose=VERBOSE):
    if verbose > 0:
        print "Loading checkpoint from %s ..." % dumpfile
    f = open(dumpfile, "rb")
    c = pickle.load(f)
    f.close()
    if verbose > 0:
        print "Done."
        print "Root:", "\n      ".join(c.roots)
    return c 
Example 26
Project: pyblish-win   Author: pyblish   File: grammar.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def load(self, filename):
        """Load the grammar tables from a pickle file."""
        f = open(filename, "rb")
        d = pickle.load(f)
        f.close()
        self.__dict__.update(d) 
Example 27
Project: pyblish-win   Author: pyblish   File: test_signal.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_main(self):
        # This function spawns a child process to insulate the main
        # test-running process from all the signals. It then
        # communicates with that child process over a pipe and
        # re-raises information about any exceptions the child
        # raises. The real work happens in self.run_test().
        os_done_r, os_done_w = os.pipe()
        with closing(os.fdopen(os_done_r)) as done_r, \
             closing(os.fdopen(os_done_w, 'w')) as done_w:
            child = os.fork()
            if child == 0:
                # In the child process; run the test and report results
                # through the pipe.
                try:
                    done_r.close()
                    # Have to close done_w again here because
                    # exit_subprocess() will skip the enclosing with block.
                    with closing(done_w):
                        try:
                            self.run_test()
                        except:
                            pickle.dump(traceback.format_exc(), done_w)
                        else:
                            pickle.dump(None, done_w)
                except:
                    print 'Uh oh, raised from pickle.'
                    traceback.print_exc()
                finally:
                    exit_subprocess()

            done_w.close()
            # Block for up to MAX_DURATION seconds for the test to finish.
            r, w, x = select.select([done_r], [], [], self.MAX_DURATION)
            if done_r in r:
                tb = pickle.load(done_r)
                if tb:
                    self.fail(tb)
            else:
                os.kill(child, signal.SIGKILL)
                self.fail('Test deadlocked after %d seconds.' %
                          self.MAX_DURATION) 
Example 28
Project: pyblish-win   Author: pyblish   File: test_random.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_bug_1727780(self):
        # verify that version-2-pickles can be loaded
        # fine, whether they are created on 32-bit or 64-bit
        # platforms, and that version-3-pickles load fine.
        files = [("randv2_32.pck", 780),
                 ("randv2_64.pck", 866),
                 ("randv3.pck", 343)]
        for file, value in files:
            f = open(test_support.findfile(file),"rb")
            r = pickle.load(f)
            f.close()
            self.assertEqual(r.randrange(1000), value) 
Example 29
Project: LipNet-PyTorch   Author: sailordiary   File: dataloader.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __getitem__(self, index):
        # images: bs x chan x T x H x W
        x = torch.zeros(3, self.opt.max_timesteps, 50, 100)
        # load video using read_data() and shove into x
        d = self.dataset[index]
        # targets: bs-length tensor of targets (each one is the length of the target seq)
        frames, y, sub = read_data(d, self.opt, self.vocab_mapping)
        x[:, : frames.size(1), :, :] = frames
        # input lengths: bs-length tensor of integers, representing
        # the number of input timesteps/frames for the given batch element
        length = frames.size(1)

        return x, y, length, index 
Example 30
Project: natural-questions   Author: google-research-datasets   File: nq_eval.py    Apache License 2.0 5 votes vote down vote up
def main(_):
  cache_path = os.path.join(os.path.dirname(FLAGS.gold_path), 'cache')
  if FLAGS.cache_gold_data and os.path.exists(cache_path):
    logging.info('Reading from cache: %s', format(cache_path))
    nq_gold_dict = pickle.load(open(cache_path, 'r'))
  else:
    nq_gold_dict = util.read_annotation(
        FLAGS.gold_path, n_threads=FLAGS.num_threads)
    if FLAGS.cache_gold_data:
      logging.info('Caching gold data for next time to: %s', format(cache_path))
      pickle.dump(nq_gold_dict, open(cache_path, 'w'))

  nq_pred_dict = util.read_prediction_json(FLAGS.predictions_path)

  long_answer_stats, short_answer_stats = score_answers(nq_gold_dict,
                                                        nq_pred_dict)

  if FLAGS.pretty_print:
    print('*' * 20)
    print('LONG ANSWER R@P TABLE:')
    print_r_at_p_table(long_answer_stats)
    print('*' * 20)
    print('SHORT ANSWER R@P TABLE:')
    print_r_at_p_table(short_answer_stats)

    scores = compute_final_f1(long_answer_stats, short_answer_stats)
    print('*' * 20)
    print('METRICS IGNORING SCORES (n={}):'.format(scores['long-answer-n']))
    print('              F1     /  P      /  R')
    print('Long answer  {: >7.2%} / {: >7.2%} / {: >7.2%}'.format(
        scores['long-answer-f1'], scores['long-answer-precision'],
        scores['long-answer-recall']))
    print('Short answer {: >7.2%} / {: >7.2%} / {: >7.2%}'.format(
        scores['short-answer-f1'], scores['short-answer-precision'],
        scores['short-answer-recall']))
  else:
    metrics = get_metrics_with_answer_stats(long_answer_stats,
                                            short_answer_stats)
    print(json.dumps(metrics)) 
Example 31
Project: BASS   Author: Cisco-Talos   File: binary_database.py    GNU General Public License v2.0 5 votes vote down vote up
def load(clazz, path):
        with open(path, "rb") as f:
            return clazz(pickle.load(f)) 
Example 32
Project: BASS   Author: Cisco-Talos   File: server.py    GNU General Public License v2.0 5 votes vote down vote up
def function_raw_hash_get():
    global Session
    session = Session()
    filename, file_ = request.files.items()[0]
    db = Database(pickle.load(file_))

    arch_name = db.architecture_name
    if arch_name == "metapc":
        arch_name = "x86"
    try:
        arch = session.query(Architecture).filter(Architecture.name == arch_name and \
                Architecture.bits == db.architecture_bits and \
                Architecture.little_endian == db.architecture_endianness == "little").one()
    except NoResultFound:
        return make_response(jsonify(message = "Architecture not found"), 404)
    
    try:
        func = next(db.functions)
    except StopIteration:
        return make_response(jsonify(message = "No function found in database"), 500)

    raw_hash = _function_calculate_raw_sha256(func)
    size = _function_get_size(func)

    try:
        function = session.query(Function).filter(Function.raw_sha256 == raw_hash and \
                Function.size == size and \
                Function.arch == arch.id).one()
        return make_response(jsonify(**json.loads(function.data)), 200)
    except NoResultFound:
        return make_response(jsonify(message = "Function not found"), 404) 
Example 33
Project: BASS   Author: Cisco-Talos   File: server.py    GNU General Public License v2.0 5 votes vote down vote up
def function_mnem_hash_get():
    global Session
    session = Session()
    filename, file_ = request.files.items()[0]
    db = Database(pickle.load(file_))

    arch_name = db.architecture_name
    if arch_name == "metapc":
        arch_name = "x86"
    try:
        arch = session.query(Architecture).filter(Architecture.name == arch_name and \
                Architecture.bits == db.architecture_bits and \
                Architecture.little_endian == db.architecture_endianness == "little").one()
    except NoResultFound:
        return make_response(jsonify(message = "Architecture not found"), 404)
    
    try:
        func = next(db.functions)
    except StopIteration:
        return make_response(jsonify(message = "No function found in database"), 500)

    mnem_hash = _function_calculate_mnem_sha256(func)

    try:
        function = session.query(Function).filter(Function.mnem_sha256 == mnem_hash and \
                Function.arch == arch.id).one()
        return make_response(jsonify(**json.loads(function.data)), 200)
    except NoResultFound:
        return make_response(jsonify(message = "Function not found"), 404) 
Example 34
Project: BASS   Author: Cisco-Talos   File: binary_database.py    GNU General Public License v2.0 5 votes vote down vote up
def load(clazz, path):
        with open(path, "rb") as f:
            return clazz(pickle.load(f)) 
Example 35
Project: wechat-alfred-workflow   Author: TKkk-iOSer   File: workflow.py    MIT License 5 votes vote down vote up
def load(cls, file_obj):
        """Load serialized object from open JSON file.

        .. versionadded:: 1.8

        :param file_obj: file handle
        :type file_obj: ``file`` object
        :returns: object loaded from JSON file
        :rtype: object

        """
        return json.load(file_obj) 
Example 36
Project: wechat-alfred-workflow   Author: TKkk-iOSer   File: workflow.py    MIT License 5 votes vote down vote up
def load(cls, file_obj):
        """Load serialized object from open pickle file.

        .. versionadded:: 1.8

        :param file_obj: file handle
        :type file_obj: ``file`` object
        :returns: object loaded from pickle file
        :rtype: object

        """
        return cPickle.load(file_obj) 
Example 37
Project: wechat-alfred-workflow   Author: TKkk-iOSer   File: workflow.py    MIT License 5 votes vote down vote up
def _load(self):
        """Load cached settings from JSON file `self._filepath`."""
        data = {}
        with LockFile(self._filepath, 0.5):
            with open(self._filepath, 'rb') as fp:
                data.update(json.load(fp))

        self._original = deepcopy(data)

        self._nosave = True
        self.update(data)
        self._nosave = False 
Example 38
Project: wechat-alfred-workflow   Author: TKkk-iOSer   File: workflow.py    MIT License 5 votes vote down vote up
def cached_data(self, name, data_func=None, max_age=60):
        """Return cached data if younger than ``max_age`` seconds.

        Retrieve data from cache or re-generate and re-cache data if
        stale/non-existant. If ``max_age`` is 0, return cached data no
        matter how old.

        :param name: name of datastore
        :param data_func: function to (re-)generate data.
        :type data_func: ``callable``
        :param max_age: maximum age of cached data in seconds
        :type max_age: ``int``
        :returns: cached data, return value of ``data_func`` or ``None``
            if ``data_func`` is not set

        """
        serializer = manager.serializer(self.cache_serializer)

        cache_path = self.cachefile('%s.%s' % (name, self.cache_serializer))
        age = self.cached_data_age(name)

        if (age < max_age or max_age == 0) and os.path.exists(cache_path):

            with open(cache_path, 'rb') as file_obj:
                self.logger.debug('loading cached data: %s', cache_path)
                return serializer.load(file_obj)

        if not data_func:
            return None

        data = data_func()
        self.cache_data(name, data)

        return data 
Example 39
Project: Collaborative-Learning-for-Weakly-Supervised-Object-Detection   Author: Sunarker   File: train_val.py    MIT License 5 votes vote down vote up
def initialize(self):
    # Initial file lists are empty
    np_paths = []
    ss_paths = []
    # Fresh train directly from ImageNet weights
    print('Loading initial model weights from {:s}'.format(self.pretrained_model))
    self.net.load_pretrained_cnn(torch.load(self.pretrained_model))
    print('Loaded.')
    
#    pretrained_model = torch.load('/DATA3_DB7/data/jjwang/workspace/two_stage/output/vgg16/voc_2007_trainval/default/vgg16_faster_rcnn_iter_50001.pth')    
    if self.wsddn_premodel is not None: # Load the pretrained WSDDN model
      wsddn_pre = torch.load(self.wsddn_premodel)
      model_dict = self.net.state_dict()
      model_dict.update(wsddn_pre)
      self.net.load_state_dict(model_dict)
      print('Loading pretrained WSDDN model weights from {:s}'.format(self.wsddn_premodel))
      print('Loaded.')
    
    
    # Need to fix the variables before loading, so that the RGB weights are changed to BGR
    # For VGG16 it also changes the convolutional weights fc6 and fc7 to
    # fully connected weights
    last_snapshot_iter = 0
    lr = cfg.TRAIN.LEARNING_RATE
    stepsizes = list(cfg.TRAIN.STEPSIZE)

    return lr, last_snapshot_iter, stepsizes, np_paths, ss_paths 
Example 40
Project: Collaborative-Learning-for-Weakly-Supervised-Object-Detection   Author: Sunarker   File: pascal_voc.py    MIT License 5 votes vote down vote up
def _load_rpn_roidb(self, gt_roidb):
    filename = self.config['rpn_file']
    print('loading {}'.format(filename))
    assert os.path.exists(filename), \
      'rpn data not found at: {}'.format(filename)
    with open(filename, 'rb') as f:
      box_list = pickle.load(f)
    return self.create_roidb_from_box_list(box_list, gt_roidb) 
Example 41
Project: Collaborative-Learning-for-Weakly-Supervised-Object-Detection   Author: Sunarker   File: pascal_voc.py    MIT License 5 votes vote down vote up
def selective_search_roidb(self):
    """
    Return the database of selective search regions of interest.
    Ground-truth ROIs are also included.

    This function loads/saves from/to a cache file to speed up future calls.
    """
    cache_file = os.path.join(self.cache_path,
                              self.name + '_selective_search_roidb.pkl')

    if os.path.exists(cache_file):
        with open(cache_file, 'rb') as fid:
            roidb = pickle.load(fid)
        print('{} ss roidb loaded from {}'.format(self.name, cache_file))
        return roidb

    if int(self._year) == 2007 or self._image_set != 'test':
        gt_roidb = self.gt_roidb()
        # ss_roidb = self._load_selective_search_roidb(gt_roidb)
        # roidb = datasets.imdb.merge_roidbs(gt_roidb, ss_roidb)
        roidb = self._load_selective_search_roidb(gt_roidb)
    else:
        roidb = self._load_selective_search_roidb(None)
    with open(cache_file, 'wb') as fid:
        pickle.dump(roidb, fid, pickle.HIGHEST_PROTOCOL)
    print('wrote ss roidb to {}'.format(cache_file))

    return roidb 
Example 42
Project: Collaborative-Learning-for-Weakly-Supervised-Object-Detection   Author: Sunarker   File: reval.py    MIT License 5 votes vote down vote up
def from_dets(imdb_name, output_dir, args):
  imdb = get_imdb(imdb_name)
  imdb.competition_mode(args.comp_mode)
  imdb.config['matlab_eval'] = args.matlab_eval
  with open(os.path.join(output_dir, 'detections.pkl'), 'rb') as f:
    dets = pickle.load(f)

  if args.apply_nms:
    print('Applying NMS to all detections')
    nms_dets = apply_nms(dets, cfg.TEST.NMS)
  else:
    nms_dets = dets

  print('Evaluating detections')
  imdb.evaluate_detections(nms_dets, output_dir) 
Example 43
Project: Collaborative-Learning-for-Weakly-Supervised-Object-Detection   Author: Sunarker   File: reval_discovery.py    MIT License 5 votes vote down vote up
def from_dets(imdb_name, output_dir, args):
  imdb = get_imdb(imdb_name)
  imdb.competition_mode(args.comp_mode)
  with open(os.path.join(output_dir, 'discovery.pkl'), 'rb') as f:
    dets = pickle.load(f)


  print('Evaluating detections')
  imdb.evaluate_discovery(dets, output_dir) 
Example 44
Project: Collaborative-Learning-for-Weakly-Supervised-Object-Detection   Author: Sunarker   File: show_boxes_results.py    MIT License 5 votes vote down vote up
def parse_args():
  """
  Parse input arguments
  """
  parser = argparse.ArgumentParser(description='show the imgs and the resulted boxes')
  parser.add_argument('--box', default='/DATA3_DB7/data/jjwang/workspace/wsFaster-rcnn/output/vgg16/voc_2007_test/WSDDN_PRE_50000/vgg16_faster_rcnn_iter_90000/wsddn/detections.pkl', help='boxes pkl file to load')
  parser.add_argument('--thr', default=0.1, type=float, help='idx of test img')

  if len(sys.argv) == 1:
    parser.print_help()
    sys.exit(1)

  args = parser.parse_args()
  return args 
Example 45
Project: Flask-Python-GAE-Login-Registration   Author: orymeyer   File: cache.py    Apache License 2.0 5 votes vote down vote up
def _prune(self):
        entries = self._list_dir()
        if len(entries) > self._threshold:
            now = time()
            try:
                for idx, fname in enumerate(entries):
                    remove = False
                    with open(fname, 'rb') as f:
                        expires = pickle.load(f)
                    remove = expires <= now or idx % 3 == 0

                    if remove:
                        os.remove(fname)
            except (IOError, OSError):
                pass 
Example 46
Project: Flask-Python-GAE-Login-Registration   Author: orymeyer   File: cache.py    Apache License 2.0 5 votes vote down vote up
def get(self, key):
        filename = self._get_filename(key)
        try:
            with open(filename, 'rb') as f:
                if pickle.load(f) >= time():
                    return pickle.load(f)
                else:
                    os.remove(filename)
                    return None
        except (IOError, OSError, pickle.PickleError):
            return None 
Example 47
Project: Flask-Python-GAE-Login-Registration   Author: orymeyer   File: cache.py    Apache License 2.0 5 votes vote down vote up
def _prune(self):
        entries = self._list_dir()
        if len(entries) > self._threshold:
            now = time()
            try:
                for idx, fname in enumerate(entries):
                    remove = False
                    with open(fname, 'rb') as f:
                        expires = pickle.load(f)
                    remove = expires <= now or idx % 3 == 0

                    if remove:
                        os.remove(fname)
            except (IOError, OSError):
                pass 
Example 48
Project: comet-commonsense   Author: atcbosselut   File: demo_bilinear.py    Apache License 2.0 5 votes vote down vote up
def run(gens_file, theshold=None, flip_r_e1=False):
    model = pickle.load(open("ckbc-demo/Bilinear_cetrainSize300frac1.0dSize200relSize150acti0.001.1e-05.800.RAND.tanh.txt19.pickle",  "r"))

    Rel = model['rel']
    We = model['embeddings']
    Weight = model['weight']
    Offset = model['bias']
    words = model['words_name']
    rel = model['rel_name']

    results = []

    if type(gens_file) == list:
        gens = []
        for file_name in gens_file:
            gens += open(file_name, "r").read().split("\n")
    else:
        gens = open(gens_file, "r").read().split("\n")

    formatted_gens = [tuple(i.split("\t")[:4]) for i in gens if i]

    for i, gen in enumerate(formatted_gens):
        if gen == ('s', 'r', 'o', 'minED'):
            continue
        if flip_r_e1:
            relation = "_".join(gen[1].split(" "))
            subject_ = "_".join(gen[0].split(" "))
        else:
            relation = "_".join(gen[0].split(" "))
            subject_ = "_".join(gen[1].split(" "))
        object_ = "_".join(gen[2].split(" "))
        result = score(subject_, object_, words, We, rel, Rel, Weight, Offset, relation)

        results.append((gen, result))

    return results 
Example 49
Project: wikilinks   Author: trovdimi   File: redirectscandidatespostioninserter.py    MIT License 5 votes vote down vote up
def modify_html(self, html, source_article_id):
        # we need this in order to plot the heatmap
        soup = Soup(html, 'html.parser')
        head = soup.find('base')
        print soup.find("title")
        if head is not None:
            head.decompose()


        css = soup.find("link", {"rel": "stylesheet"})
        if css is not None:
            css['href'] = 'https:' + css['href']
            headers = {'user-agent': EMAIL}
            r = requests.get(css['href'], headers=headers, stream=True)
            css['href'] = ""
            if r.status_code == 200:
                style = soup.new_tag('style')
                style.string = r.text
                css.insert_after(style)
            else:
                print('FAIL: Cannot load css  for id: "%s" ' % source_article_id)

            css.decompose()

        last_element_on_page_meta = soup.new_tag('meta')
        last_element_on_page_meta['http-equiv'] = "content-type"
        last_element_on_page_meta['content'] = "text/html; charset=utf-8"

        body = soup.find('body')
        #if body is not None:
        last_element_on_page = soup.new_tag('div')
        last_element_on_page['class'] = "pyqt_is_shit"
        body.append(last_element_on_page)
        return soup.prettify(encoding='utf-8') 
Example 50
Project: mietmap-scraper   Author: CodeforKarlsruhe   File: scrape.py    MIT License 5 votes vote down vote up
def memoize_persistently(filename):
    """
    Persistently memoize a function's return values.

    This decorator memoizes a function's return values persistently
    over multiple runs of the program. The return values are stored
    in the given file using ``pickle``. If the decorated function is
    called again with arguments that it has already been called with
    then the return value is retrieved from the cache and returned
    without calling the function. If the function is called with
    previously unseen arguments then its return value is added to the
    cache and the cache file is updated.

    Both return values and arguments of the function must support the
    pickle protocol. The arguments must also be usable as dictionary
    keys.
    """
    try:
        with open(filename, 'rb') as cache_file:
            cache = pickle.load(cache_file)
    except IOError as e:
        if e.errno != errno.ENOENT:
            raise
        cache = {}

    def decorator(f):

        @functools.wraps(f)
        def wrapper(*args, **kwargs):
            key = args + tuple(sorted(kwargs.items()))
            try:
                return cache[key]
            except KeyError:
                value = cache[key] = f(*args, **kwargs)
                with open(filename, 'wb') as cache_file:
                    pickle.dump(cache, cache_file)
                return value

        return wrapper
    return decorator 
Example 51
Project: OpenAPS   Author: medicinexlab   File: savedata.py    MIT License 5 votes vote down vote up
def load_array(name):
    """
    This function loads the data from the pickle file.
    """
    with open('./savedata/' + name + '.pickle', 'rb') as handle:
        data_item = pickle.load(handle)

    return data_item 
Example 52
Project: ieml   Author: IEMLdev   File: commons.py    GNU General Public License v3.0 5 votes vote down vote up
def get(self) -> object:
        """
        Unpickle and return the object stored in the cache file.
        :return: the stored object
        """
        with open(self.cache_file, 'rb') as fp:
            return pickle.load(fp) 
Example 53
Project: Automated-Social-Annotation   Author: acadTags   File: data_util.py    MIT License 5 votes vote down vote up
def get_label_sub_matrix(vocabulary_word2index_label,kb_path,name_scope=''):
    cache_path ='../cache_vocabulary_label_pik/'+ name_scope + "_label_sub.pik"
    print("cache_path:",cache_path,"file_exists:",os.path.exists(cache_path))
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as data_f:
            result=pickle.load(data_f)
            return result
    else:
        # load label embedding
        m = len(vocabulary_word2index_label)
        result=np.zeros((m,m))
        with open(kb_path, 'r') as label_pairs:
            lps = label_pairs.readlines() # lps: label pairs
        lps = [x.strip() for x in lps]
        for lp in lps:
            labels = lp.split(',')
            if len(labels) == 3 and labels[-1] == 'true' or len(labels) == 2:
                index_j = vocabulary_word2index_label.get(labels[0].lower(),-1)
                index_k = vocabulary_word2index_label.get(labels[1].lower(),-1)
                if index_j != -1 and index_k != -1 and index_j != index_k: # if both of the two labels are in the training data, and they are different from each other (diagonal as 0).
                    result[index_j,index_k] = 1.
                    print('matched:', labels[0], str(index_j), labels[1], str(index_k))
        #save to file system if vocabulary of words is not exists.
        if not os.path.exists(cache_path):
            with open(cache_path, 'ab') as data_f:
                pickle.dump(result, data_f)
    return result  
    
# a weighted 
Example 54
Project: Automated-Social-Annotation   Author: acadTags   File: data_util.py    MIT License 5 votes vote down vote up
def get_label_sim_matrix(vocabulary_index2word_label,word2vec_model_label_path='../tag-all.bin-300',name_scope='',threshold=0):
    cache_path ='../cache_vocabulary_label_pik/'+ name_scope + "_label_sim_" + str(threshold) + ".pik"
    print("cache_path:",cache_path,"file_exists:",os.path.exists(cache_path))
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as data_f:
            result=pickle.load(data_f)
            return result
    else:
        model=word2vec.load(word2vec_model_label_path,kind='bin')
        #m = model.vectors.shape[0]-1 #length # the first one is </s>, to be eliminated
        m = len(vocabulary_index2word_label)
        result=np.zeros((m,m))
        count_less_th = 0.0 # count the sim less than the threshold
        for i in range(0,m):
            for j in range(0,m):
                vector_i=model.get_vector(vocabulary_index2word_label[i])
                vector_j=model.get_vector(vocabulary_index2word_label[j])
                #result[i][j] = np.dot(vector_i,vector_j.T) # can be negative here, result in [-1,1]
                result[i][j] = (1+np.dot(vector_i,vector_j.T))/2 # result in [0,1]
                if result[i][j] < threshold:
                    count_less_th = count_less_th + 1
                    result[i][j] = 0
        print("result",result)
        print("result",result.shape)
        print("retained similarities percentage:", str(1-count_less_th/float(m)/float(m)))
        #save to file system if vocabulary of words is not exists.
        if not os.path.exists(cache_path):
            with open(cache_path, 'ab') as data_f:
                pickle.dump(result, data_f)
    return result

# used for other embedding 
Example 55
Project: Automated-Social-Annotation   Author: acadTags   File: data_util.py    MIT License 5 votes vote down vote up
def create_voabulary(word2vec_model_path,name_scope=''):
    cache_path ='../cache_vocabulary_label_pik/'+ name_scope + "_word_voabulary.pik"
    print("cache_path:",cache_path,"file_exists:",os.path.exists(cache_path))
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as data_f:
            vocabulary_word2index, vocabulary_index2word=pickle.load(data_f)
            return vocabulary_word2index, vocabulary_index2word
    else:
        vocabulary_word2index={}
        vocabulary_index2word={}
        print("create vocabulary. word2vec_model_path:",word2vec_model_path)
        model=word2vec.load(word2vec_model_path,kind='bin')
        vocabulary_word2index['PAD_ID']=0
        vocabulary_index2word[0]='PAD_ID'
        special_index=0
        if 'biLstmTextRelation' in name_scope:
            vocabulary_word2index['EOS']=1 # a special token for biLstTextRelation model. which is used between two sentences.
            vocabulary_index2word[1]='EOS'
            special_index=1
        for i,vocab in enumerate(model.vocab):
            vocabulary_word2index[vocab]=i+1+special_index
            vocabulary_index2word[i+1+special_index]=vocab

        #save to file system if vocabulary of words is not exists.
        if not os.path.exists(cache_path):
            with open(cache_path, 'ab') as data_f:
                pickle.dump((vocabulary_word2index,vocabulary_index2word), data_f)
    return vocabulary_word2index,vocabulary_index2word

# create vocabulary of lables. label is sorted. 1 is high frequency, 2 is low frequency. 
Example 56
Project: Automated-Social-Annotation   Author: acadTags   File: data_util.py    MIT License 5 votes vote down vote up
def get_label_sub_matrix(vocabulary_word2index_label,kb_path,name_scope=''):
    cache_path ='../cache_vocabulary_label_pik/'+ name_scope + "_label_sub.pik"
    print("cache_path:",cache_path,"file_exists:",os.path.exists(cache_path))
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as data_f:
            result=pickle.load(data_f)
            return result
    else:
        # load label embedding
        m = len(vocabulary_word2index_label)
        result=np.zeros((m,m))
        with open(kb_path, 'r') as label_pairs:
            lps = label_pairs.readlines() # lps: label pairs
        lps = [x.strip() for x in lps]
        for lp in lps:
            labels = lp.split(',')
            if len(labels) == 3 and labels[-1] == 'true' or len(labels) == 2:
                index_j = vocabulary_word2index_label.get(labels[0].lower(),-1)
                index_k = vocabulary_word2index_label.get(labels[1].lower(),-1)
                if index_j != -1 and index_k != -1 and index_j != index_k: # if both of the two labels are in the training data, and they are different from each other (diagonal as 0).
                    result[index_j,index_k] = 1.
                    print('matched:', labels[0], str(index_j), labels[1], str(index_k))
        #save to file system if vocabulary of words is not exists.
        if not os.path.exists(cache_path):
            with open(cache_path, 'ab') as data_f:
                pickle.dump(result, data_f)
    return result  
    
# a weighted 
Example 57
Project: Automated-Social-Annotation   Author: acadTags   File: data_util.py    MIT License 5 votes vote down vote up
def create_voabulary(word2vec_model_path,name_scope=''):
    cache_path ='../cache_vocabulary_label_pik/'+ name_scope + "_word_voabulary.pik"
    print("cache_path:",cache_path,"file_exists:",os.path.exists(cache_path))
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as data_f:
            vocabulary_word2index, vocabulary_index2word=pickle.load(data_f)
            return vocabulary_word2index, vocabulary_index2word
    else:
        vocabulary_word2index={}
        vocabulary_index2word={}
        print("create vocabulary. word2vec_model_path:",word2vec_model_path)
        model=word2vec.load(word2vec_model_path,kind='bin')
        vocabulary_word2index['PAD_ID']=0
        vocabulary_index2word[0]='PAD_ID'
        special_index=0
        if 'biLstmTextRelation' in name_scope:
            vocabulary_word2index['EOS']=1 # a special token for biLstTextRelation model. which is used between two sentences.
            vocabulary_index2word[1]='EOS'
            special_index=1
        for i,vocab in enumerate(model.vocab):
            vocabulary_word2index[vocab]=i+1+special_index
            vocabulary_index2word[i+1+special_index]=vocab

        #save to file system if vocabulary of words is not exists.
        if not os.path.exists(cache_path):
            with open(cache_path, 'ab') as data_f:
                pickle.dump((vocabulary_word2index,vocabulary_index2word), data_f)
    return vocabulary_word2index,vocabulary_index2word

# create vocabulary of lables. label is sorted. 1 is high frequency, 2 is low frequency. 
Example 58
Project: Automated-Social-Annotation   Author: acadTags   File: BiGRU_train.py    MIT License 5 votes vote down vote up
def assign_pretrained_word_embedding(sess,vocabulary_index2word,vocab_size,model,num_run,word2vec_model_path=None):
    if num_run==0:
        print("using pre-trained word emebedding.started.word2vec_model_path:",word2vec_model_path)
    # transform embedding input into a dictionary
    # word2vecc=word2vec.load('word_embedding.txt') #load vocab-vector fiel.word2vecc['w91874']
    word2vec_model = word2vec.load(word2vec_model_path, kind='bin')
    word2vec_dict = {}
    for word, vector in zip(word2vec_model.vocab, word2vec_model.vectors):
        word2vec_dict[word] = vector
    word_embedding_2dlist = [[]] * vocab_size  # create an empty word_embedding list: which is a list of list, i.e. a list of word, where each word is a list of values as an embedding vector.
    word_embedding_2dlist[0] = np.zeros(FLAGS.embed_size)  # assign empty for first word:'PAD'
    bound = np.sqrt(6.0) / np.sqrt(vocab_size)  # bound for random variables.
    count_exist = 0;
    count_not_exist = 0
    for i in range(1, vocab_size):  # loop each word
        word = vocabulary_index2word[i]  # get a word
        embedding = None
        try:
            embedding = word2vec_dict[word]  # try to get vector:it is an array.
        except Exception:
            embedding = None
        if embedding is not None:  # the 'word' exist a embedding
            word_embedding_2dlist[i] = embedding;
            count_exist = count_exist + 1  # assign array to this word.
        else:  # no embedding for this word
            word_embedding_2dlist[i] = np.random.uniform(-bound, bound, FLAGS.embed_size);
            count_not_exist = count_not_exist + 1  # init a random value for the word.
    word_embedding_final = np.array(word_embedding_2dlist)  # covert to 2d array.
    #print(word_embedding_final[0]) # print the original embedding for the first word
    word_embedding = tf.constant(word_embedding_final, dtype=tf.float32)  # convert to tensor
    t_assign_embedding = tf.assign(model.Embedding,word_embedding)  # assign this value to our embedding variables of our model.
    sess.run(t_assign_embedding);
    if num_run==0:
        print("word. exists embedding:", count_exist, " ;word not exist embedding:", count_not_exist)
        print("using pre-trained word emebedding.ended...")

# based on a threshold, 在验证集上做验证,报告损失、精确度-multilabel 
Example 59
Project: Automated-Social-Annotation   Author: acadTags   File: data_util.py    MIT License 5 votes vote down vote up
def get_label_sub_matrix(vocabulary_word2index_label,kb_path,name_scope=''):
    cache_path ='../cache_vocabulary_label_pik/'+ name_scope + "_label_sub.pik"
    print("cache_path:",cache_path,"file_exists:",os.path.exists(cache_path))
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as data_f:
            result=pickle.load(data_f)
            return result
    else:
        # load label embedding
        m = len(vocabulary_word2index_label)
        result=np.zeros((m,m))
        with open(kb_path, 'r') as label_pairs:
            lps = label_pairs.readlines() # lps: label pairs
        lps = [x.strip() for x in lps]
        for lp in lps:
            labels = lp.split(',')
            if len(labels) == 3 and labels[-1] == 'true' or len(labels) == 2:
                index_j = vocabulary_word2index_label.get(labels[0].lower(),-1)
                index_k = vocabulary_word2index_label.get(labels[1].lower(),-1)
                if index_j != -1 and index_k != -1 and index_j != index_k: # if both of the two labels are in the training data, and they are different from each other (diagonal as 0).
                    result[index_j,index_k] = 1.
                    print('matched:', labels[0], str(index_j), labels[1], str(index_k))
        #save to file system if vocabulary of words is not exists.
        if not os.path.exists(cache_path):
            with open(cache_path, 'ab') as data_f:
                pickle.dump(result, data_f)
    return result  
    
# a weighted 
Example 60
Project: Automated-Social-Annotation   Author: acadTags   File: data_util.py    MIT License 5 votes vote down vote up
def get_label_sim_matrix(vocabulary_index2word_label,word2vec_model_label_path='../tag-all.bin-300',name_scope='',threshold=0):
    cache_path ='../cache_vocabulary_label_pik/'+ name_scope + "_label_sim_" + str(threshold) + ".pik"
    print("cache_path:",cache_path,"file_exists:",os.path.exists(cache_path))
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as data_f:
            result=pickle.load(data_f)
            return result
    else:
        model=word2vec.load(word2vec_model_label_path,kind='bin')
        #m = model.vectors.shape[0]-1 #length # the first one is </s>, to be eliminated
        m = len(vocabulary_index2word_label)
        result=np.zeros((m,m))
        count_less_th = 0.0 # count the sim less than the threshold
        for i in range(0,m):
            for j in range(0,m):
                vector_i=model.get_vector(vocabulary_index2word_label[i])
                vector_j=model.get_vector(vocabulary_index2word_label[j])
                #result[i][j] = np.dot(vector_i,vector_j.T) # can be negative here, result in [-1,1]
                result[i][j] = (1+np.dot(vector_i,vector_j.T))/2 # result in [0,1]
                if result[i][j] < threshold:
                    count_less_th = count_less_th + 1
                    result[i][j] = 0
        print("result",result)
        print("result",result.shape)
        print("retained similarities percentage:", str(1-count_less_th/float(m)/float(m)))
        #save to file system if vocabulary of words is not exists.
        if not os.path.exists(cache_path):
            with open(cache_path, 'ab') as data_f:
                pickle.dump(result, data_f)
    return result 
Example 61
Project: Automated-Social-Annotation   Author: acadTags   File: HAN_train.py    MIT License 5 votes vote down vote up
def assign_pretrained_word_embedding(sess,vocabulary_index2word,vocab_size,model,num_run,word2vec_model_path=None):
    if num_run==0:
        print("using pre-trained word emebedding.started.word2vec_model_path:",word2vec_model_path)
    # transform embedding input into a dictionary
    # word2vecc=word2vec.load('word_embedding.txt') #load vocab-vector fiel.word2vecc['w91874']
    word2vec_model = word2vec.load(word2vec_model_path, kind='bin')
    word2vec_dict = {}
    for word, vector in zip(word2vec_model.vocab, word2vec_model.vectors):
        word2vec_dict[word] = vector
    word_embedding_2dlist = [[]] * vocab_size  # create an empty word_embedding list: which is a list of list, i.e. a list of word, where each word is a list of values as an embedding vector.
    word_embedding_2dlist[0] = np.zeros(FLAGS.embed_size)  # assign empty for first word:'PAD'
    bound = np.sqrt(6.0) / np.sqrt(vocab_size)  # bound for random variables.
    count_exist = 0;
    count_not_exist = 0
    for i in range(1, vocab_size):  # loop each word
        word = vocabulary_index2word[i]  # get a word
        embedding = None
        try:
            embedding = word2vec_dict[word]  # try to get vector:it is an array.
        except Exception:
            embedding = None
        if embedding is not None:  # the 'word' exist a embedding
            word_embedding_2dlist[i] = embedding;
            count_exist = count_exist + 1  # assign array to this word.
        else:  # no embedding for this word
            word_embedding_2dlist[i] = np.random.uniform(-bound, bound, FLAGS.embed_size);
            count_not_exist = count_not_exist + 1  # init a random value for the word.
    word_embedding_final = np.array(word_embedding_2dlist)  # covert to 2d array.
    word_embedding = tf.constant(word_embedding_final, dtype=tf.float32)  # convert to tensor
    t_assign_embedding = tf.assign(model.Embedding,word_embedding)  # assign this value to our embedding variables of our model.
    sess.run(t_assign_embedding);
    if num_run==0:
        print("word. exists embedding:", count_exist, " ;word not exist embedding:", count_not_exist)
        print("using pre-trained word emebedding.ended...")

# based on a threshold, 在验证集上做验证,报告损失、精确度-multilabel 
Example 62
Project: Automated-Social-Annotation   Author: acadTags   File: data_util.py    MIT License 5 votes vote down vote up
def get_label_sub_matrix(vocabulary_word2index_label,kb_path,name_scope=''):
    cache_path ='../cache_vocabulary_label_pik/'+ name_scope + "_label_sub.pik"
    print("cache_path:",cache_path,"file_exists:",os.path.exists(cache_path))
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as data_f:
            result=pickle.load(data_f)
            return result
    else:
        # load label embedding
        m = len(vocabulary_word2index_label)
        result=np.zeros((m,m))
        with open(kb_path, 'r') as label_pairs:
            lps = label_pairs.readlines() # lps: label pairs
        lps = [x.strip() for x in lps]
        for lp in lps:
            labels = lp.split(',')
            if len(labels) == 3 and labels[-1] == 'true' or len(labels) == 2:
                index_j = vocabulary_word2index_label.get(labels[0].lower(),-1)
                index_k = vocabulary_word2index_label.get(labels[1].lower(),-1)
                if index_j != -1 and index_k != -1 and index_j != index_k: # if both of the two labels are in the training data, and they are different from each other (diagonal as 0).
                    result[index_j,index_k] = 1.
                    print('matched:', labels[0], str(index_j), labels[1], str(index_k))
        #save to file system if vocabulary of words is not exists.
        if not os.path.exists(cache_path):
            with open(cache_path, 'ab') as data_f:
                pickle.dump(result, data_f)
    return result  
    
# a weighted 
Example 63
Project: Automated-Social-Annotation   Author: acadTags   File: data_util.py    MIT License 5 votes vote down vote up
def get_label_sim_matrix(vocabulary_index2word_label,word2vec_model_label_path='../tag-all.bin-300',name_scope='',threshold=0):
    cache_path ='../cache_vocabulary_label_pik/'+ name_scope + "_label_sim_" + str(threshold) + ".pik"
    print("cache_path:",cache_path,"file_exists:",os.path.exists(cache_path))
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as data_f:
            result=pickle.load(data_f)
            return result
    else:
        model=word2vec.load(word2vec_model_label_path,kind='bin')
        #m = model.vectors.shape[0]-1 #length # the first one is </s>, to be eliminated
        m = len(vocabulary_index2word_label)
        result=np.zeros((m,m))
        count_less_th = 0.0 # count the sim less than the threshold
        for i in range(0,m):
            for j in range(0,m):
                vector_i=model.get_vector(vocabulary_index2word_label[i])
                vector_j=model.get_vector(vocabulary_index2word_label[j])
                #result[i][j] = np.dot(vector_i,vector_j.T) # can be negative here, result in [-1,1]
                result[i][j] = (1+np.dot(vector_i,vector_j.T))/2 # result in [0,1]
                if result[i][j] < threshold:
                    count_less_th = count_less_th + 1
                    result[i][j] = 0
        print("result",result)
        print("result",result.shape)
        print("retained similarities percentage:", str(1-count_less_th/float(m)/float(m)))
        #save to file system if vocabulary of words is not exists.
        if not os.path.exists(cache_path):
            with open(cache_path, 'ab') as data_f:
                pickle.dump(result, data_f)
    return result

# used for other embedding 
Example 64
Project: Automated-Social-Annotation   Author: acadTags   File: data_util.py    MIT License 5 votes vote down vote up
def create_voabulary(word2vec_model_path,name_scope=''):
    cache_path ='../cache_vocabulary_label_pik/'+ name_scope + "_word_voabulary.pik"
    print("cache_path:",cache_path,"file_exists:",os.path.exists(cache_path))
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as data_f:
            vocabulary_word2index, vocabulary_index2word=pickle.load(data_f)
            return vocabulary_word2index, vocabulary_index2word
    else:
        vocabulary_word2index={}
        vocabulary_index2word={}
        print("create vocabulary. word2vec_model_path:",word2vec_model_path)
        model=word2vec.load(word2vec_model_path,kind='bin')
        vocabulary_word2index['PAD_ID']=0
        vocabulary_index2word[0]='PAD_ID'
        special_index=0
        if 'biLstmTextRelation' in name_scope:
            vocabulary_word2index['EOS']=1 # a special token for biLstTextRelation model. which is used between two sentences.
            vocabulary_index2word[1]='EOS'
            special_index=1
        for i,vocab in enumerate(model.vocab):
            vocabulary_word2index[vocab]=i+1+special_index
            vocabulary_index2word[i+1+special_index]=vocab

        #save to file system if vocabulary of words is not exists.
        if not os.path.exists(cache_path):
            with open(cache_path, 'ab') as data_f:
                pickle.dump((vocabulary_word2index,vocabulary_index2word), data_f)
    return vocabulary_word2index,vocabulary_index2word

# create vocabulary of lables. label is sorted. 1 is high frequency, 2 is low frequency. 
Example 65
Project: Automated-Social-Annotation   Author: acadTags   File: JMAN_train.py    MIT License 5 votes vote down vote up
def assign_pretrained_word_embedding(sess,vocabulary_index2word,vocab_size,model,num_run,word2vec_model_path=None):
    if num_run==0:
        print("using pre-trained word emebedding.started.word2vec_model_path:",word2vec_model_path)
    # transform embedding input into a dictionary
    # word2vecc=word2vec.load('word_embedding.txt') #load vocab-vector fiel.word2vecc['w91874']
    word2vec_model = word2vec.load(word2vec_model_path, kind='bin')
    word2vec_dict = {}
    for word, vector in zip(word2vec_model.vocab, word2vec_model.vectors):
        word2vec_dict[word] = vector
    word_embedding_2dlist = [[]] * vocab_size  # create an empty word_embedding list: which is a list of list, i.e. a list of word, where each word is a list of values as an embedding vector.
    word_embedding_2dlist[0] = np.zeros(FLAGS.embed_size)  # assign empty for first word:'PAD'
    bound = np.sqrt(6.0) / np.sqrt(vocab_size)  # bound for random variables.
    count_exist = 0;
    count_not_exist = 0
    for i in range(1, vocab_size):  # loop each word
        word = vocabulary_index2word[i]  # get a word
        embedding = None
        try:
            embedding = word2vec_dict[word]  # try to get vector:it is an array.
        except Exception:
            embedding = None
        if embedding is not None:  # the 'word' exist a embedding
            word_embedding_2dlist[i] = embedding;
            count_exist = count_exist + 1  # assign array to this word.
        else:  # no embedding for this word
            word_embedding_2dlist[i] = np.random.uniform(-bound, bound, FLAGS.embed_size);
            count_not_exist = count_not_exist + 1  # init a random value for the word.
    word_embedding_final = np.array(word_embedding_2dlist)  # covert to 2d array.
    word_embedding = tf.constant(word_embedding_final, dtype=tf.float32)  # convert to tensor
    t_assign_embedding = tf.assign(model.Embedding,word_embedding)  # assign this value to our embedding variables of our model.
    sess.run(t_assign_embedding);
    if num_run==0:
        print("word. exists embedding:", count_exist, " ;word not exist embedding:", count_not_exist)
        print("using pre-trained word emebedding.ended...") 
Example 66
Project: Automated-Social-Annotation   Author: acadTags   File: data_util.py    MIT License 5 votes vote down vote up
def get_label_sub_matrix(vocabulary_word2index_label,kb_path,name_scope=''):
    cache_path ='../cache_vocabulary_label_pik/'+ name_scope + "_label_sub.pik"
    print("cache_path:",cache_path,"file_exists:",os.path.exists(cache_path))
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as data_f:
            result=pickle.load(data_f)
            return result
    else:
        # load label embedding
        m = len(vocabulary_word2index_label)
        result=np.zeros((m,m))
        with open(kb_path, 'r') as label_pairs:
            lps = label_pairs.readlines() # lps: label pairs
        lps = [x.strip() for x in lps]
        for lp in lps:
            labels = lp.split(',')
            if len(labels) == 3 and labels[-1] == 'true' or len(labels) == 2:
                index_j = vocabulary_word2index_label.get(labels[0].lower(),-1)
                index_k = vocabulary_word2index_label.get(labels[1].lower(),-1)
                if index_j != -1 and index_k != -1 and index_j != index_k: # if both of the two labels are in the training data, and they are different from each other (diagonal as 0).
                    result[index_j,index_k] = 1.
                    print('matched:', labels[0], str(index_j), labels[1], str(index_k))
        #save to file system if vocabulary of words is not exists.
        if not os.path.exists(cache_path):
            with open(cache_path, 'ab') as data_f:
                pickle.dump(result, data_f)
    return result  
    
# a weighted 
Example 67
Project: Automated-Social-Annotation   Author: acadTags   File: data_util.py    MIT License 5 votes vote down vote up
def get_label_sim_matrix(vocabulary_index2word_label,word2vec_model_label_path='../tag-all.bin-300',name_scope='',threshold=0):
    cache_path ='../cache_vocabulary_label_pik/'+ name_scope + "_label_sim_" + str(threshold) + ".pik"
    print("cache_path:",cache_path,"file_exists:",os.path.exists(cache_path))
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as data_f:
            result=pickle.load(data_f)
            return result
    else:
        model=word2vec.load(word2vec_model_label_path,kind='bin')
        #m = model.vectors.shape[0]-1 #length # the first one is </s>, to be eliminated
        m = len(vocabulary_index2word_label)
        result=np.zeros((m,m))
        count_less_th = 0.0 # count the sim less than the threshold
        for i in range(0,m):
            for j in range(0,m):
                vector_i=model.get_vector(vocabulary_index2word_label[i])
                vector_j=model.get_vector(vocabulary_index2word_label[j])
                #result[i][j] = np.dot(vector_i,vector_j.T) # can be negative here, result in [-1,1]
                result[i][j] = (1+np.dot(vector_i,vector_j.T))/2 # result in [0,1]
                if result[i][j] < threshold:
                    count_less_th = count_less_th + 1
                    result[i][j] = 0
        print("result",result)
        print("result",result.shape)
        print("retained similarities percentage:", str(1-count_less_th/float(m)/float(m)))
        #save to file system if vocabulary of words is not exists.
        if not os.path.exists(cache_path):
            with open(cache_path, 'ab') as data_f:
                pickle.dump(result, data_f)
    return result 
Example 68
Project: model-api-sequence   Author: evandowning   File: rnn.py    GNU General Public License v3.0 4 votes vote down vote up
def sequence_generator(folder, sample, foldIDs, batchSize, task, convert):
    # We want to loop infinitely because we're training our data on multiple epochs in build_LSTM_model()
    while 1:
        xSet = np.array([])
        ySet = np.array([])

        num = 0;
        for i in foldIDs:
            x = np.array([])
            y = np.array([])

            # Extract sample's name and number of sequences
            fn = sample[i][0]
            numSeq = sample[i][1]

            # Read in sample's sequences
            path = os.path.join(folder,fn+'.pkl')
            with open(path, 'rb') as fr:
                for e in enumerate(range(numSeq)):
                    t = pkl.load(fr)
                    x = t[0]
                    y = t[1]

                    # If this should be binary classification, convert labels > 0 to 1
                    if task == 'binary_classification':
                        if y > 0:
                            y = 1
                    elif task == 'multi_classification':
                        y = convert.index(y)

                    if len(xSet) == 0:
                        xSet = x
                        ySet = [y]
                    else:
                        xSet = np.vstack([xSet,x])
                        ySet = np.vstack([ySet,[y]])

                    # Increase count of number of sample features extracted
                    num += 1

                    # Batch size reached, yield data
                    if num % batchSize == 0:
                        # Here we convert our lists into Numpy arrays because
                        # Keras requires it as input for its fit_generator()
                        rv_x = xSet
                        rv_y = ySet

                        xSet = np.array([])
                        ySet = np.array([])

                        num = 0

                        yield (rv_x, rv_y)

        # Yield remaining set
        if len(xSet) > 0:
            yield (xSet, ySet)

# Builds LSTM model 
Example 69
Project: model-api-sequence   Author: evandowning   File: lstm_cudnn.py    GNU General Public License v3.0 4 votes vote down vote up
def sequence_generator(folder, sample, foldIDs, batchSize, task, convert):
    # We want to loop infinitely because we're training our data on multiple epochs in build_LSTM_model()
    while 1:
        xSet = np.array([])
        ySet = np.array([])

        num = 0;
        for i in foldIDs:
            x = np.array([])
            y = np.array([])

            # Extract sample's name and number of sequences
            fn = sample[i][0]
            numSeq = sample[i][1]

            # Read in sample's sequences
            path = os.path.join(folder,fn+'.pkl')
            with open(path, 'rb') as fr:
                for e in enumerate(range(numSeq)):
                    t = pkl.load(fr)
                    x = t[0]
                    y = t[1]

                    # If this should be binary classification, convert labels > 0 to 1
                    if task == 'binary_classification':
                        if y > 0:
                            y = 1
                    elif task == 'multi_classification':
                        y = convert.index(y)

                    if len(xSet) == 0:
                        xSet = x
                        ySet = [y]
                    else:
                        xSet = np.vstack([xSet,x])
                        ySet = np.vstack([ySet,[y]])

                    # Increase count of number of sample features extracted
                    num += 1

                    # Batch size reached, yield data
                    if num % batchSize == 0:
                        # Here we convert our lists into Numpy arrays because
                        # Keras requires it as input for its fit_generator()
                        rv_x = xSet
                        rv_y = ySet

                        xSet = np.array([])
                        ySet = np.array([])

                        num = 0

                        yield (rv_x, rv_y)

        # Yield remaining set
        if len(xSet) > 0:
            yield (xSet, ySet)

# Builds LSTM model 
Example 70
Project: model-api-sequence   Author: evandowning   File: lstm.py    GNU General Public License v3.0 4 votes vote down vote up
def sequence_generator(folder, sample, foldIDs, batchSize, task, convert):
    # We want to loop infinitely because we're training our data on multiple epochs in build_LSTM_model()
    while 1:
        xSet = np.array([])
        ySet = np.array([])

        num = 0;
        for i in foldIDs:
            x = np.array([])
            y = np.array([])

            # Extract sample's name and number of sequences
            fn = sample[i][0]
            numSeq = sample[i][1]

            # Read in sample's sequences
            path = os.path.join(folder,fn+'.pkl')
            with open(path, 'rb') as fr:
                for e in enumerate(range(numSeq)):
                    t = pkl.load(fr)
                    x = t[0]
                    y = t[1]

                    # If this should be binary classification, convert labels > 0 to 1
                    if task == 'binary_classification':
                        if y > 0:
                            y = 1
                    elif task == 'multi_classification':
                        y = convert.index(y)

                    if len(xSet) == 0:
                        xSet = x
                        ySet = [y]
                    else:
                        xSet = np.vstack([xSet,x])
                        ySet = np.vstack([ySet,[y]])

                    # Increase count of number of sample features extracted
                    num += 1

                    # Batch size reached, yield data
                    if num % batchSize == 0:
                        # Here we convert our lists into Numpy arrays because
                        # Keras requires it as input for its fit_generator()
                        rv_x = xSet
                        rv_y = ySet

                        xSet = np.array([])
                        ySet = np.array([])

                        num = 0

                        yield (rv_x, rv_y)

        # Yield remaining set
        if len(xSet) > 0:
            yield (xSet, ySet)

# Builds LSTM model 
Example 71
Project: alfred-yubikey-otp   Author: robertoriv   File: background.py    MIT License 4 votes vote down vote up
def run_in_background(name, args, **kwargs):
    r"""Cache arguments then call this script again via :func:`subprocess.call`.

    :param name: name of job
    :type name: unicode
    :param args: arguments passed as first argument to :func:`subprocess.call`
    :param \**kwargs: keyword arguments to :func:`subprocess.call`
    :returns: exit code of sub-process
    :rtype: int

    When you call this function, it caches its arguments and then calls
    ``background.py`` in a subprocess. The Python subprocess will load the
    cached arguments, fork into the background, and then run the command you
    specified.

    This function will return as soon as the ``background.py`` subprocess has
    forked, returning the exit code of *that* process (i.e. not of the command
    you're trying to run).

    If that process fails, an error will be written to the log file.

    If a process is already running under the same name, this function will
    return immediately and will not run the specified command.

    """
    if is_running(name):
        _log().info('[%s] job already running', name)
        return

    argcache = _arg_cache(name)

    # Cache arguments
    with open(argcache, 'wb') as fp:
        pickle.dump({'args': args, 'kwargs': kwargs}, fp)
        _log().debug('[%s] command cached: %s', name, argcache)

    # Call this script
    cmd = ['/usr/bin/python', __file__, name]
    _log().debug('[%s] passing job to background runner: %r', name, cmd)
    retcode = subprocess.call(cmd)

    if retcode:  # pragma: no cover
        _log().error('[%s] background runner failed with %d', name, retcode)
    else:
        _log().debug('[%s] background job started', name)

    return retcode 
Example 72
Project: alfred-yubikey-otp   Author: robertoriv   File: background.py    MIT License 4 votes vote down vote up
def main(wf):  # pragma: no cover
    """Run command in a background process.

    Load cached arguments, fork into background, then call
    :meth:`subprocess.call` with cached arguments.

    """
    log = wf.logger
    name = wf.args[0]
    argcache = _arg_cache(name)
    if not os.path.exists(argcache):
        msg = '[{0}] command cache not found: {1}'.format(name, argcache)
        log.critical(msg)
        raise IOError(msg)

    # Fork to background and run command
    pidfile = _pid_file(name)
    _background(pidfile)

    # Load cached arguments
    with open(argcache, 'rb') as fp:
        data = pickle.load(fp)

    # Cached arguments
    args = data['args']
    kwargs = data['kwargs']

    # Delete argument cache file
    os.unlink(argcache)

    try:
        # Run the command
        log.debug('[%s] running command: %r', name, args)

        retcode = subprocess.call(args, **kwargs)

        if retcode:
            log.error('[%s] command failed with status %d', name, retcode)
    finally:
        os.unlink(pidfile)

    log.debug('[%s] job complete', name) 
Example 73
Project: alfred-yubikey-otp   Author: robertoriv   File: workflow.py    MIT License 4 votes vote down vote up
def stored_data(self, name):
        """Retrieve data from data directory.

        Returns ``None`` if there are no data stored under ``name``.

        .. versionadded:: 1.8

        :param name: name of datastore

        """
        metadata_path = self.datafile('.{0}.alfred-workflow'.format(name))

        if not os.path.exists(metadata_path):
            self.logger.debug('no data stored for `%s`', name)
            return None

        with open(metadata_path, 'rb') as file_obj:
            serializer_name = file_obj.read().strip()

        serializer = manager.serializer(serializer_name)

        if serializer is None:
            raise ValueError(
                'Unknown serializer `{0}`. Register a corresponding '
                'serializer with `manager.register()` '
                'to load this data.'.format(serializer_name))

        self.logger.debug('data `%s` stored as `%s`', name, serializer_name)

        filename = '{0}.{1}'.format(name, serializer_name)
        data_path = self.datafile(filename)

        if not os.path.exists(data_path):
            self.logger.debug('no data stored: %s', name)
            if os.path.exists(metadata_path):
                os.unlink(metadata_path)

            return None

        with open(data_path, 'rb') as file_obj:
            data = serializer.load(file_obj)

        self.logger.debug('stored data loaded: %s', data_path)

        return data 
Example 74
Project: BASS   Author: Cisco-Talos   File: server.py    GNU General Public License v2.0 4 votes vote down vote up
def function_add():
    global Session

    session = Session()
    filename, file_ = request.files.items()[0]
    db = Database(pickle.load(file_))
    arch_name = db.architecture_name
    if arch_name == "metapc":
        arch_name = "x86"
    # Get the architecture, if it already exists
    try:
        arch = session.query(Architecture).filter(Architecture.name == arch_name and \
                Architecture.bits == db.architecture_bits and \
                Architecture.little_endian == db.architecture_endianness == "little").one()
    except NoResultFound:
        arch = Architecture(name = arch_name, 
                            bits = db.architecture_bits, 
                            little_endian = db.architecture_endianness == "little")
        session.add(arch)

    for func in db.functions:
        raw_hash = _function_calculate_raw_sha256(func)
        size = _function_get_size(func)

        try:
            function = session.query(Function).filter(Function.raw_sha256 == raw_hash and \
                    Function.size == size and \
                    Function.arch == arch.id).one()
        except NoResultFound:
            mnem_hash = _function_calculate_mnem_sha256(func)
            instrs = _function_count_instructions(func)
            bbs = _function_count_basic_blocks(func)
            loops = _function_count_loops(func)
            trans = _function_count_transitions(func)
            func_json = _function_get_json(func)

            function = Function(
                    raw_sha256 = raw_hash,
                    size = size,
                    mnem_sha256 = mnem_hash,
                    executable = db.sha256,
                    entry_point = func.entry_point,
                    data = json.dumps(func_json),
                    architecture = arch.id,
                    basic_blocks = bbs,
                    transitions = trans,
                    loops = loops)
            session.add(function)

    session.commit()

    return make_response("", 200) 
Example 75
Project: wechat-alfred-workflow   Author: TKkk-iOSer   File: background.py    MIT License 4 votes vote down vote up
def run_in_background(name, args, **kwargs):
    r"""Cache arguments then call this script again via :func:`subprocess.call`.

    :param name: name of job
    :type name: unicode
    :param args: arguments passed as first argument to :func:`subprocess.call`
    :param \**kwargs: keyword arguments to :func:`subprocess.call`
    :returns: exit code of sub-process
    :rtype: int

    When you call this function, it caches its arguments and then calls
    ``background.py`` in a subprocess. The Python subprocess will load the
    cached arguments, fork into the background, and then run the command you
    specified.

    This function will return as soon as the ``background.py`` subprocess has
    forked, returning the exit code of *that* process (i.e. not of the command
    you're trying to run).

    If that process fails, an error will be written to the log file.

    If a process is already running under the same name, this function will
    return immediately and will not run the specified command.

    """
    if is_running(name):
        _log().info('[%s] job already running', name)
        return

    argcache = _arg_cache(name)

    # Cache arguments
    with open(argcache, 'wb') as fp:
        pickle.dump({'args': args, 'kwargs': kwargs}, fp)
        _log().debug('[%s] command cached: %s', name, argcache)

    # Call this script
    cmd = ['/usr/bin/python', __file__, name]
    _log().debug('[%s] passing job to background runner: %r', name, cmd)
    retcode = subprocess.call(cmd)

    if retcode:  # pragma: no cover
        _log().error('[%s] background runner failed with %d', name, retcode)
    else:
        _log().debug('[%s] background job started', name)

    return retcode 
Example 76
Project: wechat-alfred-workflow   Author: TKkk-iOSer   File: background.py    MIT License 4 votes vote down vote up
def main(wf):  # pragma: no cover
    """Run command in a background process.

    Load cached arguments, fork into background, then call
    :meth:`subprocess.call` with cached arguments.

    """
    log = wf.logger
    name = wf.args[0]
    argcache = _arg_cache(name)
    if not os.path.exists(argcache):
        msg = '[{0}] command cache not found: {1}'.format(name, argcache)
        log.critical(msg)
        raise IOError(msg)

    # Fork to background and run command
    pidfile = _pid_file(name)
    _background(pidfile)

    # Load cached arguments
    with open(argcache, 'rb') as fp:
        data = pickle.load(fp)

    # Cached arguments
    args = data['args']
    kwargs = data['kwargs']

    # Delete argument cache file
    os.unlink(argcache)

    try:
        # Run the command
        log.debug('[%s] running command: %r', name, args)

        retcode = subprocess.call(args, **kwargs)

        if retcode:
            log.error('[%s] command failed with status %d', name, retcode)
    finally:
        os.unlink(pidfile)

    log.debug('[%s] job complete', name) 
Example 77
Project: wechat-alfred-workflow   Author: TKkk-iOSer   File: workflow.py    MIT License 4 votes vote down vote up
def stored_data(self, name):
        """Retrieve data from data directory.

        Returns ``None`` if there are no data stored under ``name``.

        .. versionadded:: 1.8

        :param name: name of datastore

        """
        metadata_path = self.datafile('.{0}.alfred-workflow'.format(name))

        if not os.path.exists(metadata_path):
            self.logger.debug('no data stored for `%s`', name)
            return None

        with open(metadata_path, 'rb') as file_obj:
            serializer_name = file_obj.read().strip()

        serializer = manager.serializer(serializer_name)

        if serializer is None:
            raise ValueError(
                'Unknown serializer `{0}`. Register a corresponding '
                'serializer with `manager.register()` '
                'to load this data.'.format(serializer_name))

        self.logger.debug('data `%s` stored as `%s`', name, serializer_name)

        filename = '{0}.{1}'.format(name, serializer_name)
        data_path = self.datafile(filename)

        if not os.path.exists(data_path):
            self.logger.debug('no data stored: %s', name)
            if os.path.exists(metadata_path):
                os.unlink(metadata_path)

            return None

        with open(data_path, 'rb') as file_obj:
            data = serializer.load(file_obj)

        self.logger.debug('stored data loaded: %s', data_path)

        return data 
Example 78
Project: Automated-Social-Annotation   Author: acadTags   File: data_util.py    MIT License 4 votes vote down vote up
def create_voabulary_label(voabulary_label,name_scope='',use_seq2seq=False,label_freq_th=0):
    print("create_voabulary_label_sorted.started.traning_data_path:",voabulary_label)
    cache_path ='../cache_vocabulary_label_pik/'+ name_scope + "_label_voabulary.pik"
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as data_f:
            vocabulary_word2index_label, vocabulary_index2word_label=pickle.load(data_f)
            return vocabulary_word2index_label, vocabulary_index2word_label
    else:
        zhihu_f_train = codecs.open(voabulary_label, 'r', 'utf8')
        lines=zhihu_f_train.readlines()
        count=0
        vocabulary_word2index_label={}
        vocabulary_index2word_label={}
        vocabulary_label_count_dict={} #{label:count}
        for i,line in enumerate(lines):
            if '__label__' in line:  #'__label__-2051131023989903826
                label=line[line.index('__label__')+len('__label__'):].strip().replace("\n","")
                # add multi-label processing
                #print(label)
                labels=label.split(" ")
                for label in labels:
                    if vocabulary_label_count_dict.get(label,None) is not None:
                        vocabulary_label_count_dict[label]=vocabulary_label_count_dict[label]+1
                    else:
                        vocabulary_label_count_dict[label]=1
        list_label=sort_by_value(vocabulary_label_count_dict) # sort the labels by their frequency in the training dataset.

        print("length of list_label:",len(list_label));#print(";list_label:",list_label)
        countt=0

        ##########################################################################################
        if use_seq2seq:#if used for seq2seq model,insert two special label(token):_GO AND _END
            i_list=[0,1,2];label_special_list=[_GO,_END,_PAD]
            for i,label in zip(i_list,label_special_list):
                vocabulary_word2index_label[label] = i
                vocabulary_index2word_label[i] = label
        #########################################################################################
        for i,label in enumerate(list_label):
            if i<10:
                count_value=vocabulary_label_count_dict[label]
                print("label:",label,"count_value:",count_value)
                countt=countt+count_value
            if vocabulary_label_count_dict[label]>=label_freq_th:
                indexx = i + 3 if use_seq2seq else i
                vocabulary_word2index_label[label]=indexx
                vocabulary_index2word_label[indexx]=label
        print("count top10:",countt)

        #save to file system if vocabulary of words is not exists.
        if not os.path.exists(cache_path): #如果不存在写到缓存文件中
            with open(cache_path, 'ab') as data_f:
                pickle.dump((vocabulary_word2index_label,vocabulary_index2word_label), data_f)
    print("create_voabulary_label_sorted.ended.len of vocabulary_label:",len(vocabulary_index2word_label))
    return vocabulary_word2index_label,vocabulary_index2word_label 
Example 79
Project: Automated-Social-Annotation   Author: acadTags   File: data_util.py    MIT License 4 votes vote down vote up
def create_voabulary_label(voabulary_label,name_scope='',use_seq2seq=False,label_freq_th=0):
    print("create_voabulary_label_sorted.started.traning_data_path:",voabulary_label)
    cache_path ='../cache_vocabulary_label_pik/'+ name_scope + "_label_voabulary.pik"
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as data_f:
            vocabulary_word2index_label, vocabulary_index2word_label=pickle.load(data_f)
            return vocabulary_word2index_label, vocabulary_index2word_label
    else:
        zhihu_f_train = codecs.open(voabulary_label, 'r', 'utf8')
        lines=zhihu_f_train.readlines()
        count=0
        vocabulary_word2index_label={}
        vocabulary_index2word_label={}
        vocabulary_label_count_dict={} #{label:count}
        for i,line in enumerate(lines):
            if '__label__' in line:  #'__label__-2051131023989903826
                label=line[line.index('__label__')+len('__label__'):].strip().replace("\n","")
                # add multi-label processing
                #print(label)
                labels=label.split(" ")
                for label in labels:
                    if vocabulary_label_count_dict.get(label,None) is not None:
                        vocabulary_label_count_dict[label]=vocabulary_label_count_dict[label]+1
                    else:
                        vocabulary_label_count_dict[label]=1
        list_label=sort_by_value(vocabulary_label_count_dict) # sort the labels by their frequency in the training dataset.

        print("length of list_label:",len(list_label));#print(";list_label:",list_label)
        countt=0

        ##########################################################################################
        if use_seq2seq:#if used for seq2seq model,insert two special label(token):_GO AND _END
            i_list=[0,1,2];label_special_list=[_GO,_END,_PAD]
            for i,label in zip(i_list,label_special_list):
                vocabulary_word2index_label[label] = i
                vocabulary_index2word_label[i] = label
        #########################################################################################
        for i,label in enumerate(list_label):
            if i<10:
                count_value=vocabulary_label_count_dict[label]
                print("label:",label,"count_value:",count_value)
                countt=countt+count_value
            if vocabulary_label_count_dict[label]>=label_freq_th:
                indexx = i + 3 if use_seq2seq else i
                vocabulary_word2index_label[label]=indexx
                vocabulary_index2word_label[indexx]=label
        print("count top10:",countt)

        #save to file system if vocabulary of words is not exists.
        if not os.path.exists(cache_path): #如果不存在写到缓存文件中
            with open(cache_path, 'ab') as data_f:
                pickle.dump((vocabulary_word2index_label,vocabulary_index2word_label), data_f)
    print("create_voabulary_label_sorted.ended.len of vocabulary_label:",len(vocabulary_index2word_label))
    return vocabulary_word2index_label,vocabulary_index2word_label 
Example 80
Project: Automated-Social-Annotation   Author: acadTags   File: data_util.py    MIT License 4 votes vote down vote up
def create_voabulary_label(voabulary_label,name_scope='',use_seq2seq=False,label_freq_th=0):
    print("create_voabulary_label_sorted.started.traning_data_path:",voabulary_label)
    cache_path ='../cache_vocabulary_label_pik/'+ name_scope + "_label_voabulary.pik"
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as data_f:
            vocabulary_word2index_label, vocabulary_index2word_label=pickle.load(data_f)
            return vocabulary_word2index_label, vocabulary_index2word_label
    else:
        zhihu_f_train = codecs.open(voabulary_label, 'r', 'utf8')
        lines=zhihu_f_train.readlines()
        count=0
        vocabulary_word2index_label={}
        vocabulary_index2word_label={}
        vocabulary_label_count_dict={} #{label:count}
        for i,line in enumerate(lines):
            if '__label__' in line:  #'__label__-2051131023989903826
                label=line[line.index('__label__')+len('__label__'):].strip().replace("\n","")
                # add multi-label processing
                #print(label)
                labels=label.split(" ")
                for label in labels:
                    if vocabulary_label_count_dict.get(label,None) is not None:
                        vocabulary_label_count_dict[label]=vocabulary_label_count_dict[label]+1
                    else:
                        vocabulary_label_count_dict[label]=1
        list_label=sort_by_value(vocabulary_label_count_dict) # sort the labels by their frequency in the training dataset.

        print("length of list_label:",len(list_label));#print(";list_label:",list_label)
        countt=0

        ##########################################################################################
        if use_seq2seq:#if used for seq2seq model,insert two special label(token):_GO AND _END
            i_list=[0,1,2];label_special_list=[_GO,_END,_PAD]
            for i,label in zip(i_list,label_special_list):
                vocabulary_word2index_label[label] = i
                vocabulary_index2word_label[i] = label
        #########################################################################################
        for i,label in enumerate(list_label):
            if i<10:
                count_value=vocabulary_label_count_dict[label]
                print("label:",label,"count_value:",count_value)
                countt=countt+count_value
            if vocabulary_label_count_dict[label]>=label_freq_th:
                indexx = i + 3 if use_seq2seq else i
                vocabulary_word2index_label[label]=indexx
                vocabulary_index2word_label[indexx]=label
        print("count top10:",countt)

        #save to file system if vocabulary of words is not exists.
        if not os.path.exists(cache_path): #如果不存在写到缓存文件中
            with open(cache_path, 'ab') as data_f:
                pickle.dump((vocabulary_word2index_label,vocabulary_index2word_label), data_f)
    print("create_voabulary_label_sorted.ended.len of vocabulary_label:",len(vocabulary_index2word_label))
    return vocabulary_word2index_label,vocabulary_index2word_label