Python ujson.load() Examples

The following are code examples for showing how to use ujson.load(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: MnemonicReader   Author: HKUST-KnowComp   File: preprocess.py    BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def load_dataset(path):
    """Load json file and store fields separately."""
    with open(path) as f:
        data = json.load(f)['data']
    output = {'qids': [], 'questions': [], 'answers': [],
              'contexts': [], 'qid2cid': []}
    for article in data:
        for paragraph in article['paragraphs']:
            output['contexts'].append(paragraph['context'])
            for qa in paragraph['qas']:
                output['qids'].append(qa['id'])
                output['questions'].append(qa['question'])
                output['qid2cid'].append(len(output['contexts']) - 1)
                if 'answers' in qa:
                    output['answers'].append(qa['answers'])
    return output 
Example 2
Project: MnemonicReader   Author: HKUST-KnowComp   File: utils.py    BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def load_answers(filename):
    """Load the answers only of a SQuAD dataset. Store as qid -> [answers]."""
    # Load JSON file
    with open(filename) as f:
        examples = json.load(f)['data']

    ans = {}
    for article in examples:
        for paragraph in article['paragraphs']:
            for qa in paragraph['qas']:
                ans[qa['id']] = list(map(lambda x: x['text'], qa['answers']))
    return ans


# ------------------------------------------------------------------------------
# Dictionary building
# ------------------------------------------------------------------------------ 
Example 3
Project: monasca-events-api   Author: openstack   File: test_events_v1.py    Apache License 2.0 6 votes vote down vote up
def test_should_fail_missing_timestamp_in_body(self, bulk_processor):
        events_resource = _init_resource(self)
        events_resource._processor = bulk_processor
        unit_test_patch = os.path.dirname(__file__)
        json_file_path = 'event_template_json/req_simple_event.json'
        patch_to_req_simple_event_file = os.path.join(unit_test_patch,
                                                      json_file_path)
        with open(patch_to_req_simple_event_file, 'r') as fi:
            events = json.load(fi)['events']
        body = {'events': [events]}
        response = self.simulate_request(
            path=ENDPOINT,
            method='POST',
            headers={
                'Content-Type': 'application/json',
                'X_ROLES': 'monasca-user'
            },
            body=json.dumps(body)
        )
        self.assertEqual(falcon.HTTP_422, response.status) 
Example 4
Project: DoubanMovieTool   Author: frostnotfall   File: utils.py    MIT License 6 votes vote down vote up
def aiohttp_check_cookie():
    try:
        with open('cookies.json', 'r', encoding="UTF-8") as f:
            cookies = ujson.load(f)
        async with aiohttp.ClientSession(cookie_jar=aiohttp.CookieJar(),
                                         headers=head,
                                         cookies=cookies) as session:
            async with session.get('https://movie.douban.com'):
                pass
    except (ValueError, FileNotFoundError):
        async with aiohttp.ClientSession(cookie_jar=aiohttp.CookieJar(),
                                         headers=head) as session:
            async with session.get('https://movie.douban.com') as res:
                cookies = session.cookie_jar.filter_cookies('https://movie.douban.com')
                for key, cookie in res.cookies.items():
                    cookies[cookie.key] = cookie.value
                with open('cookies.json', 'w') as f:
                    ujson.dump(cookies, f) 
Example 5
Project: diplomacy   Author: diplomacy   File: server.py    GNU Affero General Public License v3.0 6 votes vote down vote up
def load_json_from_disk(filename):
    """ Return a JSON dictionary loaded from given filename.
        If JSON parsing fail for given filename, try to load JSON dictionary for a backup file
        (if present) and rename backup file to given filename
        (backup file becomes current file versions).

        :param filename: file path to open
        :return: JSON dictionary loaded from file
        :rtype: dict
    """
    try:
        with open(filename, 'rb') as file:
            json_dict = json.load(file)
    except ValueError as exception:
        backup_filename = get_backup_filename(filename)
        if not os.path.isfile(backup_filename):
            raise exception
        with open(backup_filename, 'rb') as backup_file:
            json_dict = json.load(backup_file)
        os.rename(backup_filename, filename)
    return json_dict 
Example 6
Project: tensorpack-mask-rcnn   Author: armandmcqueen   File: dataset.py    Apache License 2.0 6 votes vote down vote up
def load(self, add_gt=True, add_mask=False):
        """
        Args:
            add_gt: whether to add ground truth bounding box annotations to the dicts
            add_mask: whether to also add ground truth mask

        Returns:
            a list of dict, each has keys including:
                'height', 'width', 'id', 'file_name',
                and (if add_gt is True) 'boxes', 'class', 'is_crowd', and optionally
                'segmentation'.
        """
        if add_mask:
            assert add_gt
        with timed_operation('Load Groundtruth Boxes for {}'.format(self.name)):
            img_ids = self.coco.getImgIds()
            img_ids.sort()
            # list of dict, each has keys: height,width,id,file_name
            imgs = self.coco.loadImgs(img_ids)

            for img in tqdm.tqdm(imgs):
                self._use_absolute_file_name(img)
                if add_gt:
                    self._add_detection_gt(img, add_mask)
            return imgs 
Example 7
Project: BERT   Author: yyht   File: es_indexing.py    Apache License 2.0 6 votes vote down vote up
def __init__(self, config=None):

		if isinstance(config, dict) or isinstance(config, OrderedDict):
			self.config = config
		elif isinstance(config, str):
			try:
				self.config = json.load(open(config, "r"))
			except:
				self.config = {}

		self.username = self.config.get("username", "data_security_es_45")
		self.password = self.config.get("password", "Nb6121ca7ffe3")
		es_url = self.config.get("es_url", ['http://zsearch.alipay.com:9999'])

		if isinstance(es_url, list):
			self.es_url = es_url
		else:
			self.es_url = [es_url]

		self.es = Elasticsearch(self.es_url, http_auth=(self.username, self.password)) 
Example 8
Project: f6a_tw_crawler   Author: chhsiao1981   File: json_to_csv.py    MIT License 6 votes vote down vote up
def json_to_csv(filename):
    with open(filename, 'r') as f:
        the_struct = json.load(f)

    the_struct = [_parse_struct(each_struct) for each_struct in the_struct]

    filename_meta = re.sub(ur'json$', 'meta.json', re.sub('^data', 'data/meta', filename))

    columns = []

    with open(filename_meta, 'r') as f:
        struct_meta = json.load(f)
        if struct_meta:
            columns = struct_meta[0].get('columns', [])
            columns = [each_column.encode('utf-8') for each_column in columns]

    df = pd.DataFrame(the_struct)

    if columns:
        df = df[columns]

    out_filename = re.sub(ur'.json$', '.csv', filename)

    cfg.logger.debug('to csv: out_filename: %s', out_filename)
    df.to_csv(out_filename, index=False) 
Example 9
Project: QANet-for-SQuAD-2.0   Author: Aadesh-Magare   File: util.py    MIT License 6 votes vote down vote up
def load_model(model, checkpoint_path, gpu_ids, return_step=True):
    """Load model parameters from disk.

    Args:
        model (torch.nn.DataParallel): Load parameters into this model.
        checkpoint_path (str): Path to checkpoint to load.
        gpu_ids (list): GPU IDs for DataParallel.
        return_step (bool): Also return the step at which checkpoint was saved.

    Returns:
        model (torch.nn.DataParallel): Model loaded from checkpoint.
        step (int): Step at which checkpoint was saved. Only if `return_step`.
    """
    device = 'cuda:{}'.format(gpu_ids[0]) if gpu_ids else 'cpu'
    ckpt_dict = torch.load(checkpoint_path, map_location=device)

    # Build model, load parameters
    model.load_state_dict(ckpt_dict['model_state'])

    if return_step:
        step = ckpt_dict['step']
        return model, step

    return model 
Example 10
Project: QANet-for-SQuAD-2.0   Author: Aadesh-Magare   File: util.py    MIT License 6 votes vote down vote up
def torch_from_json(path, dtype=torch.float32):
    """Load a PyTorch Tensor from a JSON file.

    Args:
        path (str): Path to the JSON file to load.
        dtype (torch.dtype): Data type of loaded array.

    Returns:
        tensor (torch.Tensor): Tensor loaded from JSON file.
    """
    with open(path, 'r') as fh:
        array = np.array(json.load(fh))

    tensor = torch.from_numpy(array).type(dtype)

    return tensor 
Example 11
Project: sanic-webpack   Author: qwexvf   File: __init__.py    GNU General Public License v3.0 6 votes vote down vote up
def _set_asset_paths(self, app):
        """
        Read in the manifest json file which acts as a manifest for assets.
        This allows us to get the asset path as well as hashed names.

        :param app: Sanic application
        :return: None
        """
        webpack_stats = app.config['WEBPACK_MANIFEST_PATH']

        try:
            with open(webpack_stats) as stats_json:
                stats = json.load(stats_json)

                if app.config['WEBPACK_ASSETS_URL']:
                    self.assets_url = app.config['WEBPACK_ASSETS_URL']
                else:
                    self.assets_url = stats['publicPath']

                self.assets = stats['assets']
        except IOError:
            raise RuntimeError(
                "Sanic-Webpack requires 'WEBPACK_MANIFEST_PATH' to be set and "
                "it must point to a valid json file.") 
Example 12
Project: zulip   Author: zulip   File: 0097_reactions_emoji_code.py    Apache License 2.0 6 votes vote down vote up
def populate_new_fields(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None:
    # Open the JSON file which contains the data to be used for migration.
    MIGRATION_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "management", "data")
    path_to_unified_reactions = os.path.join(MIGRATION_DATA_PATH, "unified_reactions.json")
    unified_reactions = ujson.load(open(path_to_unified_reactions))

    Reaction = apps.get_model('zerver', 'Reaction')
    for reaction in Reaction.objects.all():
        reaction.emoji_code = unified_reactions.get(reaction.emoji_name)
        if reaction.emoji_code is None:
            # If it's not present in the unified_reactions map, it's a realm emoji.
            reaction.emoji_code = reaction.emoji_name
            if reaction.emoji_name == 'zulip':
                # `:zulip:` emoji is a zulip special custom emoji.
                reaction.reaction_type = 'zulip_extra_emoji'
            else:
                reaction.reaction_type = 'realm_emoji'
        reaction.save() 
Example 13
Project: zulip   Author: zulip   File: compilemessages.py    Apache License 2.0 6 votes vote down vote up
def create_language_name_map(self) -> None:
        join = os.path.join
        deploy_root = settings.DEPLOY_ROOT
        path = join(deploy_root, 'locale', 'language_options.json')
        output_path = join(deploy_root, 'locale', 'language_name_map.json')

        with open(path, 'r') as reader:
            languages = ujson.load(reader)
            lang_list = []
            for lang_info in languages['languages']:
                lang_info['name'] = lang_info['name_local']
                del lang_info['name_local']
                lang_list.append(lang_info)

            lang_list.sort(key=lambda lang: lang['name'])

        with open(output_path, 'w') as output_file:
            ujson.dump({'name_map': lang_list}, output_file, indent=4, sort_keys=True)
            output_file.write('\n') 
Example 14
Project: swaggerit   Author: dutradda   File: utils.py    MIT License 5 votes vote down vote up
def __call__(self, uri):
        schema_filename = os.path.join(self._schemas_path, uri.lstrip('/'))
        with open(schema_filename) as json_schema_file:
            return ujson.load(json_schema_file) 
Example 15
Project: swaggerit   Author: dutradda   File: utils.py    MIT License 5 votes vote down vote up
def get_swagger_json(current_filename, swagger_json_name=None):
    if swagger_json_name is None:
        swagger_json_name = 'swagger.json'

    return ujson.load(open(os.path.join(get_dir_path(current_filename), swagger_json_name))) 
Example 16
Project: micropython-barebones   Author: reapzor   File: upip.py    MIT License 5 votes vote down vote up
def get_pkg_metadata(name):
    f = url_open("https://pypi.org/pypi/%s/json" % name)
    try:
        return json.load(f)
    finally:
        f.close() 
Example 17
Project: trelliolibs   Author: quikmile   File: helpers.py    MIT License 5 votes vote down vote up
def json_file_to_dict(_file: str) -> dict:
    """
    convert json file data to dict

    :param str _file: file location including name

    :rtype: dict
    :return: converted json to dict
    """
    config = None
    with open(_file) as config_file:
        config = json.load(config_file)

    return config 
Example 18
Project: ConvLab   Author: ConvLab   File: util.py    MIT License 5 votes vote down vote up
def read_as_pickle(data_path, **kwargs):
    '''Submethod to read data as pickle'''
    with open(data_path, 'rb') as f:
        data = pickle.load(f)
    return data 
Example 19
Project: ConvLab   Author: ConvLab   File: util.py    MIT License 5 votes vote down vote up
def read_as_plain(data_path, **kwargs):
    '''Submethod to read data as plain type'''
    open_file = open(data_path, 'r')
    ext = get_file_ext(data_path)
    if ext == '.json':
        data = ujson.load(open_file, **kwargs)
    elif ext == '.yml':
        data = yaml.load(open_file, **kwargs)
    else:
        data = open_file.read()
    open_file.close()
    return data 
Example 20
Project: orionx-api-client   Author: itolosa   File: fetch_method.py    MIT License 5 votes vote down vote up
def __init__(self, filename='schema.json'):
    self.index = {}
    fp = open(filename)
    self.schema = ujson.load(fp)

    for type_data in self.schema['data']['__schema']['types']:
      kind = type_data['kind']
      name = type_data['name']

      if kind not in self.index:
        self.index[kind] = {}

      self.index[kind][name] = type_data

    fp.close() 
Example 21
Project: MnemonicReader   Author: HKUST-KnowComp   File: utils.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def load_text(filename):
    """Load the paragraphs only of a SQuAD dataset. Store as qid -> text."""
    # Load JSON file
    with open(filename) as f:
        examples = json.load(f)['data']

    texts = {}
    for article in examples:
        for paragraph in article['paragraphs']:
            for qa in paragraph['qas']:
                texts[qa['id']] = paragraph['context']
    return texts 
Example 22
Project: decaNLP   Author: salesforce   File: predict.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_best(args):
    with open(os.path.join(args.path, 'config.json')) as f:
        save_every = json.load(f)['save_every']
    
    with open(os.path.join(args.path, 'process_0.log')) as f:
        lines = f.readlines()

    best_score = 0
    best_it = 0
    deca_scores = {}
    for l in lines:
        if 'val' in l:
            try:
                task = l.split('val_')[1].split(':')[0]
            except Exception as e:
                print(e)
                continue
            it = int(l.split('iteration_')[1].split(':')[0])
            metric = args.task_to_metric[task]
            score = float(l.split(metric+'_')[1].split(':')[0])
            if it in deca_scores:
                deca_scores[it]['deca'] += score
                deca_scores[it][metric] = score
            else:
                deca_scores[it] = {'deca': score, metric: score}
            if deca_scores[it]['deca'] > best_score:
                best_score = deca_scores[it]['deca']
                best_it = it
    print(best_it)
    print(best_score)
    return os.path.join(args.path, f'iteration_{int(best_it)}.pth') 
Example 23
Project: DoubanMovieTool   Author: frostnotfall   File: utils.py    MIT License 5 votes vote down vote up
def preload():
    def preload_img():
        *_, movie_id_list = data_funcs.load()
        img_dir = Path('img')
        for id_ in movie_id_list:
            if Path(img_dir, id_).exists() is False:
                print(f"{datetime.datetime.now().strftime('%Y.%m.%d-%H:%M:%S')}:预缓存,电影ID{id_}")
                data_funcs.save_img(id_)
                time.sleep(60)

    time.sleep(600)
    t = threading.Thread(target=preload_img)
    print(f"{datetime.datetime.now().strftime('%Y.%m.%d-%H:%M:%S')}:周期性任务 - 预缓存")
    t.setDaemon(True)
    t.start() 
Example 24
Project: diplomacy   Author: diplomacy   File: server.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def load_game(self, game_id):
        """ Return a game matching given game ID from server database.
            Raise an exception if such game does not exists.

            If such game is already stored in server object, return it.

            Else, load it from disk but **does not store it in server object**.

            To load and immediately store a game object in server object, please use method get_game().

            Method load_game() is convenient when you want to iterate over all games in server database
            without taking memory space.

            :param game_id: ID of game to load.
            :return: a ServerGame object
            :rtype: ServerGame
        """
        if game_id in self.games:
            return self.games[game_id]
        game_filename = os.path.join(ensure_path(self.games_path), '%s.json' % game_id)
        if not os.path.isfile(game_filename):
            raise exceptions.GameIdException()
        try:
            server_game = ServerGame.from_dict(load_json_from_disk(game_filename))  # type: ServerGame
            server_game.server = self
            server_game.filter_usernames(self.users.has_username)
            server_game.filter_tokens(self.users.has_token)
            return server_game
        except ValueError as exc:
            # Error occurred while parsing JSON file: bad JSON file.
            try:
                os.remove(game_filename)
            finally:
                # This should be an internal server error.
                raise exc 
Example 25
Project: diplomacy   Author: diplomacy   File: server.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def get_game(self, game_id):
        """ Return game saved on server matching given game ID.
            Raise an exception if game ID not found.
            Return game if already loaded on memory, else load it from disk, store it,
            perform any loading/addition processing and return it.

            :param game_id: ID of game to load.
            :return: a ServerGame object.
            :rtype: ServerGame
        """
        server_game = self.load_game(game_id)
        if game_id not in self.games:
            LOGGER.debug('Game loaded: %s', game_id)
            # Check dummy powers for this game as soon as it's loaded from disk.
            self.register_dummy_power_names(server_game)
            # Register game on memory.
            self.games[server_game.game_id] = server_game
            # Start DAIDE server for this game.
            self.start_new_daide_server(server_game.game_id)
            # We have just loaded game from disk. Start it if necessary.
            if not server_game.start_master and server_game.has_expected_controls_count():
                # We may have to start game.
                if server_game.does_not_wait():
                    # We must process game.
                    server_game.process()
                    self.save_game(server_game)
                # Game must be scheduled only if active.
                if server_game.is_game_active:
                    LOGGER.debug('Game loaded and scheduled: %s', server_game.game_id)
                    self.schedule_game(server_game)
        return server_game 
Example 26
Project: diplomacy   Author: diplomacy   File: test_real_game.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(self, case_file_name, hostname=DEFAULT_HOSTNAME, port=DEFAULT_PORT):
        """ Initialize game test.

            :param case_file_name: File name of JSON file containing expected game data.
                JSON file must be located in folder FILE_FOLDER_NAME.
            :param hostname: hostname to use to load server.
            :param port: port to use to load server.
        """
        full_file_path = os.path.join(self.FILE_FOLDER_NAME, case_file_name)
        with open(full_file_path, 'rb') as file:
            data = json.load(file)
        self.case_name = case_file_name
        self.map_name = data['map']
        self.phases = [ExpectedPhase(json_phase) for json_phase in data['phases']]
        self.rules = set(data['rules'])
        self.rules.add('POWER_CHOICE')
        self.rules.add('REAL_TIME')

        self.test_server = None
        self.io_loop = None  # type: IOLoop
        self.connection = None
        self.admin_channel = None
        self.admin_game = None
        self.user_games = {}
        self.future_games_ended = {}  # type: Dict[str, Future]

        self.hostname = hostname
        self.port = port 
Example 27
Project: Erasmus   Author: bryanforbes   File: json.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def load(fp: IO[str], *args: Any, **kwargs: Any) -> Any:
    return ujson.load(fp, *args, **kwargs) 
Example 28
Project: ProsperAPI   Author: EVEprosper   File: split_utils.py    MIT License 5 votes vote down vote up
def read_split_info(
        split_info_file=path.join(HERE, 'split_info.json'),
        logger=logging.getLogger('publicAPI'),
):
    """initialize SPLIT_INFO for project

    Notes:
        Does not update global SPLIT_INFO (use `main` scope)

    Args:
        split_info_file (str, optional): path to split_info.json
        logger (:obj:`logging.logger`, optional): logging handle

    Returns:
        (:obj:`dict`) dict of type_id:SplitInfo

    """
    logger.info('Reading split file: {0}'.format(split_info_file))
    with open(split_info_file, 'r') as split_fh:
        split_list = json.load(split_fh)

    logger.info('loading split info into objects')
    split_collection = {}
    for split_info in split_list:
        split_obj = SplitInfo(split_info)
        logger.debug(split_obj)
        split_collection[split_obj.type_id] = split_obj

    return split_collection 
Example 29
Project: R-Net   Author: HKUST-KnowComp   File: inference.py    MIT License 5 votes vote down vote up
def __init__(self):
        with open(word_emb_file, "r") as fh:
            self.word_mat = np.array(json.load(fh), dtype=np.float32)
        with open(char_emb_file, "r") as fh:
            self.char_mat = np.array(json.load(fh), dtype=np.float32)
        with open(word2idx_file, "r") as fh:
            self.word2idx_dict = json.load(fh)
        with open(char2idx_file, "r") as fh:
            self.char2idx_dict = json.load(fh)
        self.model = InfModel(self.word_mat, self.char_mat)
        sess_config = tf.ConfigProto(allow_soft_placement=True)
        sess_config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=sess_config)
        saver = tf.train.Saver()
        saver.restore(self.sess, tf.train.latest_checkpoint(save_dir)) 
Example 30
Project: tensorpack-mask-rcnn   Author: armandmcqueen   File: dataset.py    Apache License 2.0 5 votes vote down vote up
def print_coco_metrics(self, json_file):
        """
        Args:
            json_file (str): path to the results json file in coco format
        Returns:
            dict: the evaluation metrics
        """
        from pycocotools.cocoeval import COCOeval
        ret = {}
        fields = ['IoU=0.5:0.95', 'IoU=0.5', 'IoU=0.75', 'small', 'medium', 'large']
        json_obj = json.load(open(json_file))

        # Prevent crash in self.coco.loadRes if the json is empty
        if len(json_obj) == 0:
            for k in range(6):
                ret['mAP(bbox)/' + fields[k]] = 0.0

            if cfg.MODE_MASK:
                for k in range(6):
                    ret['mAP(segm)/' + fields[k]] = 0.0
            return ret

        cocoDt = self.coco.loadRes(json_file)
        cocoEval = COCOeval(self.coco, cocoDt, 'bbox')
        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()

        for k in range(6):
            ret['mAP(bbox)/' + fields[k]] = cocoEval.stats[k]

        if 'segmentation' in json_obj[0]:
            cocoEval = COCOeval(self.coco, cocoDt, 'segm')
            cocoEval.evaluate()
            cocoEval.accumulate()
            cocoEval.summarize()
            for k in range(6):
                ret['mAP(segm)/' + fields[k]] = cocoEval.stats[k]
        return ret 
Example 31
Project: tensorpack-mask-rcnn   Author: armandmcqueen   File: dataset.py    Apache License 2.0 5 votes vote down vote up
def load_many(basedir, names, add_gt=True, add_mask=False):
        """
        Load and merges several instance files together.

        Returns the same format as :meth:`COCODetection.load`.
        """
        if not isinstance(names, (list, tuple)):
            names = [names]
        ret = []
        for n in names:
            coco = COCODetection(basedir, n)
            ret.extend(coco.load(add_gt, add_mask=add_mask))
        return ret 
Example 32
Project: trace-parser   Author: WPO-Foundation   File: trace_parser.py    Apache License 2.0 5 votes vote down vote up
def ProcessTimeline(self, timeline):
        self.__init__()
        self.cpu['main_thread'] = '0'
        self.threads['0'] = {}
        events = None
        f = None
        try:
            file_name, ext = os.path.splitext(timeline)
            if ext.lower() == '.gz':
                f = gzip.open(timeline, 'rb')
            else:
                f = open(timeline, 'r')
            events = json.load(f)
            if events:
                # convert the old format timeline events into our internal
                # representation
                for event in events:
                    if 'method' in event and 'params' in event:
                        if self.start_time is None:
                            if event['method'] == 'Network.requestWillBeSent' and \
                                    'timestamp' in event['params']:
                                self.start_time = event['params']['timestamp'] * 1000000.0
                                self.end_time = event['params']['timestamp'] * 1000000.0
                        else:
                            if 'timestamp' in event['params']:
                                t = event['params']['timestamp'] * 1000000.0
                                if t > self.end_time:
                                    self.end_time = t
                            if event['method'] == 'Timeline.eventRecorded' and \
                                    'record' in event['params']:
                                e = self.ProcessOldTimelineEvent(
                                    event['params']['record'], None)
                                if e is not None:
                                    self.timeline_events.append(e)
                self.ProcessTimelineEvents()
        except BaseException:
            logging.critical("Error processing timeline " + timeline)
        if f is not None:
            f.close() 
Example 33
Project: picogeojson   Author: fortyninemaps   File: deserializer.py    MIT License 5 votes vote down vote up
def fromfile(self, f):
        if hasattr(f, 'read'):
            return self.deserialize(json.load(f))
        elif hasattr(f, 'open'):
            with f.open() as f:
                return self.deserialize(json.load(f))
        with open(f) as f:
            return self.deserialize(json.load(f)) 
Example 34
Project: Question_Answering_Models   Author: l11x0m7   File: main.py    MIT License 5 votes vote down vote up
def demo(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    model = QANet(config, None, word_mat, char_mat, trainable=False, demo = True)
    demo = Demo(model, config) 
Example 35
Project: f6a_tw_crawler   Author: chhsiao1981   File: meta_json_to_csv.py    MIT License 5 votes vote down vote up
def _get_meta(idx):
    meta = {}
    filename = 'data/meta/' + str(idx) + '.meta.json'
    try:
        with open(filename, 'r') as f:
            meta = json.load(f)
        meta = meta[0]
    except Exception as e:
        cfg.logger.error('unable to load meta: idx: %s filename: %s e: %s', idx, filename, e)
        meta = {}
    meta['desc'] = meta['desc'].strip()

    return meta 
Example 36
Project: trelliopg   Author: quikmile   File: sql.py    MIT License 5 votes vote down vote up
def get_db_settings(config_file=None):
    if not config_file:
        config_file = os.environ.get('CONFIG_FILE')

    if not config_file:
        config_file = './config.json'

    with open(config_file) as f:
        settings = json.load(f)

        if 'DATABASE_SETTINGS' not in settings.keys():
            raise KeyError('"DATABASE_SETTINGS" key not found in config file')

    return settings['DATABASE_SETTINGS'] 
Example 37
Project: Batch.io   Author: rosenio   File: storages.py    MIT License 5 votes vote down vote up
def read(self):
        # Get the file size
        self._handle.seek(0, os.SEEK_END)
        size = self._handle.tell()

        if not size:
            # File is empty
            return None
        else:
            self._handle.seek(0)
            return json.load(self._handle) 
Example 38
Project: QANet-for-SQuAD-2.0   Author: Aadesh-Magare   File: util.py    MIT License 5 votes vote down vote up
def __init__(self, data_path, use_v2=True):
        super(SQuAD, self).__init__()

        dataset = np.load(data_path)
        self.context_idxs = torch.from_numpy(dataset['context_idxs']).long()
        self.context_char_idxs = torch.from_numpy(dataset['context_char_idxs']).long()
        self.question_idxs = torch.from_numpy(dataset['ques_idxs']).long()
        self.question_char_idxs = torch.from_numpy(dataset['ques_char_idxs']).long()
        self.y1s = torch.from_numpy(dataset['y1s']).long()
        self.y2s = torch.from_numpy(dataset['y2s']).long()

        if use_v2:
            # SQuAD 2.0: Use index 0 for no-answer token (token 1 = OOV)
            batch_size, c_len, w_len = self.context_char_idxs.size()
            ones = torch.ones((batch_size, 1), dtype=torch.int64)
            self.context_idxs = torch.cat((ones, self.context_idxs), dim=1)
            self.question_idxs = torch.cat((ones, self.question_idxs), dim=1)

            ones = torch.ones((batch_size, 1, w_len), dtype=torch.int64)
            self.context_char_idxs = torch.cat((ones, self.context_char_idxs), dim=1)
            self.question_char_idxs = torch.cat((ones, self.question_char_idxs), dim=1)

            self.y1s += 1
            self.y2s += 1

        # SQuAD 1.1: Ignore no-answer examples
        self.ids = torch.from_numpy(dataset['ids']).long()
        self.valid_idxs = [idx for idx in range(len(self.ids))
                           if use_v2 or self.y1s[idx].item() >= 0] 
Example 39
Project: QANet-for-SQuAD-2.0   Author: Aadesh-Magare   File: util.py    MIT License 5 votes vote down vote up
def visualize(tbx, pred_dict, eval_path, step, split, num_visuals):
    """Visualize text examples to TensorBoard.

    Args:
        tbx (tensorboardX.SummaryWriter): Summary writer.
        pred_dict (dict): dict of predictions of the form id -> pred.
        eval_path (str): Path to eval JSON file.
        step (int): Number of examples seen so far during training.
        split (str): Name of data split being visualized.
        num_visuals (int): Number of visuals to select at random from preds.
    """
    if num_visuals <= 0:
        return
    if num_visuals > len(pred_dict):
        num_visuals = len(pred_dict)

    visual_ids = np.random.choice(list(pred_dict), size=num_visuals, replace=False)

    with open(eval_path, 'r') as eval_file:
        eval_dict = json.load(eval_file)
    for i, id_ in enumerate(visual_ids):
        pred = pred_dict[id_] or 'N/A'
        example = eval_dict[str(id_)]
        question = example['question']
        context = example['context']
        answers = example['answers']

        gold = answers[0] if answers else 'N/A'
        tbl_fmt = ('- **Question:** {}\n'
                   + '- **Context:** {}\n'
                   + '- **Answer:** {}\n'
                   + '- **Prediction:** {}')
        tbx.add_text(tag='{}/{}_of_{}'.format(split, i + 1, num_visuals),
                     text_string=tbl_fmt.format(question, context, gold, pred),
                     global_step=step) 
Example 40
Project: spacy_zh_model   Author: algteam   File: util.py    MIT License 5 votes vote down vote up
def get_lang_class(lang):
    """Import and load a Language class.

    lang (unicode): Two-letter language code, e.g. 'en'.
    RETURNS (Language): Language class.
    """
    global LANGUAGES
    if lang not in LANGUAGES:
        try:
            module = importlib.import_module('.lang.%s' % lang, 'spacy')
        except ImportError:
            raise ImportError(Errors.E048.format(lang=lang))
        LANGUAGES[lang] = getattr(module, module.__all__[0])
    return LANGUAGES[lang] 
Example 41
Project: spacy_zh_model   Author: algteam   File: util.py    MIT License 5 votes vote down vote up
def load_model_from_link(name, **overrides):
    """Load a model from a shortcut link, or directory in spaCy data path."""
    path = get_data_path() / name / '__init__.py'
    try:
        cls = import_file(name, path)
    except AttributeError:
        raise IOError(Errors.E051.format(name=name))
    return cls.load(**overrides) 
Example 42
Project: spacy_zh_model   Author: algteam   File: util.py    MIT License 5 votes vote down vote up
def load_model_from_package(name, **overrides):
    """Load a model from an installed package."""
    cls = importlib.import_module(name)
    return cls.load(**overrides) 
Example 43
Project: spacy_zh_model   Author: algteam   File: util.py    MIT License 5 votes vote down vote up
def load_model_from_init_py(init_file, **overrides):
    """Helper function to use in the `load()` method of a model package's
    __init__.py.

    init_file (unicode): Path to model's __init__.py, i.e. `__file__`.
    **overrides: Specific overrides, like pipeline components to disable.
    RETURNS (Language): `Language` class with loaded model.
    """
    model_path = Path(init_file).parent
    meta = get_model_meta(model_path)
    data_dir = '%s_%s-%s' % (meta['lang'], meta['name'], meta['version'])
    data_path = model_path / data_dir
    if not model_path.exists():
        raise IOError(Errors.E052.format(path=path2str(data_path)))
    return load_model_from_path(data_path, meta, **overrides) 
Example 44
Project: spacy_zh_model   Author: algteam   File: util.py    MIT License 5 votes vote down vote up
def read_json(location):
    """Open and load JSON from file.

    location (Path): Path to JSON file.
    RETURNS (dict): Loaded JSON content.
    """
    location = ensure_path(location)
    with location.open('r', encoding='utf8') as f:
        return ujson.load(f) 
Example 45
Project: slp   Author: georgepar   File: system.py    MIT License 5 votes vote down vote up
def pickle_load(fname: str) -> Any:
    with open(fname, 'rb') as fd:
        data = pickle.load(fd)
    return data 
Example 46
Project: slp   Author: georgepar   File: system.py    MIT License 5 votes vote down vote up
def json_load(fname: str) -> types.GenericDict:
    with open(fname, 'r') as fd:
        data = json.load(fd)
    return cast(types.GenericDict, data) 
Example 47
Project: HatfieldFX   Author: b-sea   File: storages.py    MIT License 5 votes vote down vote up
def read(self):
        # Get the file size
        self._handle.seek(0, 2)
        size = self._handle.tell()

        if not size:
            # File is empty
            return None
        else:
            self._handle.seek(0)
            return json.load(self._handle) 
Example 48
Project: carbon_xs_gui   Author: lktsui   File: CarbonXS_GUI.py    GNU General Public License v3.0 5 votes vote down vote up
def start_fitting_process(self):
        """
        Calls the fitting process to begin
        :return:
        """


        # Verifies that XRD pattern data has been loaded
        if len(self.x_data) > 0 and len(self.y_data) > 0:
            print "Beginning fitting process."

            # If all sanity checks pass, proceed with fit
            if self.pre_run_sanity_check(True):
                self.call_fit_program()

        # If not, prompt the user to load data
        else:

            reply = QtGui.QMessageBox.question(self, 'No XRD Pattern Loaded',
                                               "Do you want to load a pattern?", QtGui.QMessageBox.Yes |
                                               QtGui.QMessageBox.No, QtGui.QMessageBox.No)

            if reply == QtGui.QMessageBox.Yes:

                self.open_pattern()
                # If a pattern has been loaded, proceeed with fit.
                if len(self.x_data) > 0 and len(self.y_data) > 0 and self.pre_run_sanity_check(True):
                    print "Loaded an XRD pattern"
                    self.call_fit_program() 
Example 49
Project: zulip   Author: zulip   File: test_stripe.py    Apache License 2.0 5 votes vote down vote up
def read_stripe_fixture(decorated_function_name: str,
                        mocked_function_name: str) -> Callable[[Any, Any], Any]:
    def _read_stripe_fixture(*args: Any, **kwargs: Any) -> Any:
        mock = operator.attrgetter(mocked_function_name)(sys.modules[__name__])
        fixture_path = stripe_fixture_path(decorated_function_name, mocked_function_name, mock.call_count)
        fixture = ujson.load(open(fixture_path, 'r'))
        # Check for StripeError fixtures
        if "json_body" in fixture:
            requestor = stripe.api_requestor.APIRequestor()
            # This function will raise the relevant StripeError according to the fixture
            requestor.interpret_response(fixture["http_body"], fixture["http_status"], fixture["headers"])
        return stripe.util.convert_to_stripe_object(fixture)
    return _read_stripe_fixture 
Example 50
Project: zulip   Author: zulip   File: mattermost.py    Apache License 2.0 5 votes vote down vote up
def get_name_to_codepoint_dict() -> Dict[str, str]:
    with open(NAME_TO_CODEPOINT_PATH) as fp:
        return ujson.load(fp) 
Example 51
Project: zulip   Author: zulip   File: slack.py    Apache License 2.0 5 votes vote down vote up
def get_data_file(path: str) -> Any:
    with open(path, "r") as fp:
        data = ujson.load(fp)
        return data 
Example 52
Project: zulip   Author: zulip   File: hipchat.py    Apache License 2.0 5 votes vote down vote up
def read_room_data(data_dir: str) -> List[ZerverFieldsT]:
    fn = 'rooms.json'
    data_file = os.path.join(data_dir, fn)
    with open(data_file) as f:
        data = ujson.load(f)
    return data 
Example 53
Project: zulip   Author: zulip   File: users.py    Apache License 2.0 5 votes vote down vote up
def team_view(request: HttpRequest) -> HttpResponse:
    with open(static_path('generated/github-contributors.json')) as f:
        data = ujson.load(f)

    return render(
        request,
        'zerver/team.html',
        context={
            'page_params': {
                'contrib': data['contrib'],
            },
            'date': data['date'],
        },
    ) 
Example 54
Project: zulip   Author: zulip   File: compilemessages.py    Apache License 2.0 5 votes vote down vote up
def get_translation_percentage(self, locale_path: str, locale: str) -> int:

        # backend stats
        po = polib.pofile(self.get_po_filename(locale_path, locale))
        not_translated = len(po.untranslated_entries())
        total = len(po.translated_entries()) + not_translated

        # frontend stats
        with open(self.get_json_filename(locale_path, locale)) as reader:
            for key, value in ujson.load(reader).items():
                total += 1
                if value == '':
                    not_translated += 1

        # mobile stats
        with open(os.path.join(locale_path, 'mobile_info.json')) as mob:
            mobile_info = ujson.load(mob)
        try:
            info = mobile_info[locale]
        except KeyError:
            if self.strict:
                raise
            info = {'total': 0, 'not_translated': 0}

        total += info['total']
        not_translated += info['not_translated']

        return (total - not_translated) * 100 // total 
Example 55
Project: zulip   Author: zulip   File: test_import_export.py    Apache License 2.0 5 votes vote down vote up
def test_export_single_user(self) -> None:
        output_dir = self._make_output_dir()
        cordelia = self.example_user('cordelia')

        with patch('logging.info'):
            do_export_user(cordelia, output_dir)

        def read_file(fn: str) -> Any:
            full_fn = os.path.join(output_dir, fn)
            with open(full_fn) as f:
                return ujson.load(f)

        messages = read_file('messages-000001.json')
        user = read_file('user.json')

        exported_user_id = self.get_set(user['zerver_userprofile'], 'id')
        self.assertEqual(exported_user_id, set([cordelia.id]))
        exported_user_email = self.get_set(user['zerver_userprofile'], 'email')
        self.assertEqual(exported_user_email, set([cordelia.email]))

        exported_recipient_type_id = self.get_set(user['zerver_recipient'], 'type_id')
        self.assertIn(cordelia.id, exported_recipient_type_id)

        exported_stream_id = self.get_set(user['zerver_stream'], 'id')
        self.assertIn(list(exported_stream_id)[0], exported_recipient_type_id)

        exported_recipient_id = self.get_set(user['zerver_recipient'], 'id')
        exported_subscription_recipient = self.get_set(user['zerver_subscription'], 'recipient')
        self.assertEqual(exported_recipient_id, exported_subscription_recipient)

        exported_messages_recipient = self.get_set(messages['zerver_message'], 'recipient')
        self.assertIn(list(exported_messages_recipient)[0], exported_recipient_id) 
Example 56
Project: zulip   Author: zulip   File: test_mattermost_importer.py    Apache License 2.0 5 votes vote down vote up
def test_write_emoticon_data(self) -> None:
        fixture_file_name = self.fixture_file_name("export.json", "mattermost_fixtures")
        mattermost_data = mattermost_data_file_to_dict(fixture_file_name)
        output_dir = self.make_import_output_dir("mattermost")
        zerver_realm_emoji = write_emoticon_data(
            realm_id=3,
            custom_emoji_data=mattermost_data["emoji"],
            data_dir=self.fixture_file_name("", "mattermost_fixtures"),
            output_dir = output_dir
        )
        self.assertEqual(len(zerver_realm_emoji), 2)
        self.assertEqual(zerver_realm_emoji[0]["file_name"], "peerdium")
        self.assertEqual(zerver_realm_emoji[0]["realm"], 3)
        self.assertEqual(zerver_realm_emoji[0]["deactivated"], False)

        self.assertEqual(zerver_realm_emoji[1]["file_name"], "tick")
        self.assertEqual(zerver_realm_emoji[1]["realm"], 3)
        self.assertEqual(zerver_realm_emoji[1]["deactivated"], False)

        records_file = os.path.join(output_dir, "emoji", "records.json")
        with open(records_file, "r") as f:
            records_json = ujson.load(f)

        self.assertEqual(records_json[0]["file_name"], "peerdium")
        self.assertEqual(records_json[0]["realm_id"], 3)
        exported_emoji_path = self.fixture_file_name(mattermost_data["emoji"][0]["image"], "mattermost_fixtures")
        self.assertTrue(filecmp.cmp(records_json[0]["path"], exported_emoji_path))

        self.assertEqual(records_json[1]["file_name"], "tick")
        self.assertEqual(records_json[1]["realm_id"], 3)
        exported_emoji_path = self.fixture_file_name(mattermost_data["emoji"][1]["image"], "mattermost_fixtures")
        self.assertTrue(filecmp.cmp(records_json[1]["path"], exported_emoji_path)) 
Example 57
Project: zulip   Author: zulip   File: test_mattermost_importer.py    Apache License 2.0 5 votes vote down vote up
def read_file(self, team_output_dir: str, output_file: str) -> Any:
        full_path = os.path.join(team_output_dir, output_file)
        with open(full_path) as f:
            return ujson.load(f) 
Example 58
Project: zulip   Author: zulip   File: i18n.py    Apache License 2.0 5 votes vote down vote up
def get_language_list() -> List[Dict[str, Any]]:
    path = os.path.join(settings.DEPLOY_ROOT, 'locale', 'language_name_map.json')
    with open(path, 'r') as reader:
        languages = ujson.load(reader)
        return languages['name_map'] 
Example 59
Project: decaNLP   Author: salesforce   File: predict.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def get_args():
    parser = ArgumentParser()
    parser.add_argument('--path', required=True)
    parser.add_argument('--evaluate', type=str, required=True)
    parser.add_argument('--tasks', default=['squad', 'iwslt.en.de', 'cnn_dailymail', 'multinli.in.out', 'sst', 'srl', 'zre', 'woz.en', 'wikisql', 'schema'], nargs='+')
    parser.add_argument('--devices', default=[0], nargs='+', type=int, help='a list of devices that can be used (multi-gpu currently WIP)')
    parser.add_argument('--seed', default=123, type=int, help='Random seed.')
    parser.add_argument('--data', default='/decaNLP/.data/', type=str, help='where to load data from.')
    parser.add_argument('--embeddings', default='/decaNLP/.embeddings', type=str, help='where to save embeddings.')
    parser.add_argument('--checkpoint_name')
    parser.add_argument('--bleu', action='store_true', help='whether to use the bleu metric (always on for iwslt)')
    parser.add_argument('--rouge', action='store_true', help='whether to use the bleu metric (always on for cnn, dailymail, and cnn_dailymail)')
    parser.add_argument('--overwrite', action='store_true', help='whether to overwrite previously written predictions')
    parser.add_argument('--silent', action='store_true', help='whether to print predictions to stdout')

    args = parser.parse_args()

    with open(os.path.join(args.path, 'config.json')) as config_file:
        config = json.load(config_file)
        retrieve = ['model', 
                    'transformer_layers', 'rnn_layers', 'transformer_hidden', 
                    'dimension', 'load', 'max_val_context_length', 'val_batch_size', 
                    'transformer_heads', 'max_output_length', 'max_generative_vocab', 
                    'lower', 'cove', 'intermediate_cove', 'elmo', 'glove_and_char']
        for r in retrieve:
            if r in config:
                setattr(args, r,  config[r])
            elif 'cove' in r:
                setattr(args, r, False)
            elif 'elmo' in r:
                setattr(args, r, [-1])
            elif 'glove_and_char' in r:
                setattr(args, r, True)
            else:
                setattr(args, r, None)
        args.dropout_ratio = 0.0

    args.task_to_metric = {'cnn_dailymail': 'avg_rouge',
        'iwslt.en.de': 'bleu',
        'multinli.in.out': 'em',
        'squad': 'nf1',
        'srl': 'nf1',
        'sst': 'em',
        'wikisql': 'lfem',
        'woz.en': 'joint_goal_em',
        'zre': 'corpus_f1',
        'schema': 'em'}

    if not args.checkpoint_name is None:
        args.best_checkpoint = os.path.join(args.path, args.checkpoint_name)
    else:
        assert os.path.exists(os.path.join(args.path, 'process_0.log'))
        args.best_checkpoint = get_best(args)
           
    return args 
Example 60
Project: cappuccino   Author: FoxDev   File: userdb.py    GNU General Public License v3.0 4 votes vote down vote up
def __init__(self, bot):
        self.bot = bot
        self.root = Path('data')
        self.file = self.root / 'userdb.json'
        self.data = {}
        self.last_write = datetime.now()

        for sig in (signal.SIGINT, signal.SIGTERM):
            signal.signal(sig, self._shutdown_hook)

        try:
            self.config = self.bot.config[__name__]
            if not self.config.get('enable_http_server'):
                return
            host, port = self.config['http_host'], int(self.config['http_port'])
        except KeyError:
            host, port = '127.0.0.1', 8080

        try:
            with self.file.open('r') as fd:
                self.data.update(json.load(fd))
        except FileNotFoundError:
            # Database file itself doesn't need to exist on first run, it will be created on first write.
            if not self.file.exists():
                # Copy ricedb.json from old installations if it exists.
                old_db_file = Path(self.root) / 'ricedb.json'
                if old_db_file.exists():
                    old_db_file.replace(self.file)
                    with self.file.open('r') as fd:
                        self.data.update(json.load(fd))
                else:
                    self.root.mkdir(exist_ok=True)
                    self.file.touch(exist_ok=True)
                    self.bot.log.debug(f'Created {self.root} directory')

        # If any user has an uppercase in their nick, convert the whole DB to lowercase.
        db_copy = self.data.copy()
        for user, data in db_copy.items():
            if any(c.isupper() for c in user):
                self.set_user_value(user, self.data.pop(user))

        bottle.hook('before_request')(strip_path)
        bottle.route('/')(lambda: http_json_dump(self.data))
        bottle_thread = threading.Thread(
            target=bottle.run,
            kwargs={'quiet': True, 'host': host, 'port': port},
            name='{0} HTTP server'.format(__name__),
            daemon=True
        )
        bottle_thread.start() 
Example 61
Project: R-Net   Author: HKUST-KnowComp   File: main.py    MIT License 4 votes vote down vote up
def test(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    print("Loading model...")
    test_batch = get_dataset(config.test_record_file, get_record_parser(
        config, is_test=True), config).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, char_mat, trainable=False)

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        remapped_dict = {}
        for step in tqdm(range(total // config.batch_size + 1)):
            qa_id, loss, yp1, yp2 = sess.run(
                [model.qa_id, model.loss, model.yp1, model.yp2])
            answer_dict_, remapped_dict_ = convert_tokens(
                eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)
            losses.append(loss)
        loss = np.mean(losses)
        metrics = evaluate(eval_file, answer_dict)
        with open(config.answer_file, "w") as fh:
            json.dump(remapped_dict, fh)
        print("Exact Match: {}, F1: {}".format(
            metrics['exact_match'], metrics['f1'])) 
Example 62
Project: R-Net   Author: HKUST-KnowComp   File: prepro.py    MIT License 4 votes vote down vote up
def process_file(filename, data_type, word_counter, char_counter):
    print("Generating {} examples...".format(data_type))
    examples = []
    eval_examples = {}
    total = 0
    with open(filename, "r") as fh:
        source = json.load(fh)
        for article in tqdm(source["data"]):
            for para in article["paragraphs"]:
                context = para["context"].replace(
                    "''", '" ').replace("``", '" ')
                context_tokens = word_tokenize(context)
                context_chars = [list(token) for token in context_tokens]
                spans = convert_idx(context, context_tokens)
                for token in context_tokens:
                    word_counter[token] += len(para["qas"])
                    for char in token:
                        char_counter[char] += len(para["qas"])
                for qa in para["qas"]:
                    total += 1
                    ques = qa["question"].replace(
                        "''", '" ').replace("``", '" ')
                    ques_tokens = word_tokenize(ques)
                    ques_chars = [list(token) for token in ques_tokens]
                    for token in ques_tokens:
                        word_counter[token] += 1
                        for char in token:
                            char_counter[char] += 1
                    y1s, y2s = [], []
                    answer_texts = []
                    for answer in qa["answers"]:
                        answer_text = answer["text"]
                        answer_start = answer['answer_start']
                        answer_end = answer_start + len(answer_text)
                        answer_texts.append(answer_text)
                        answer_span = []
                        for idx, span in enumerate(spans):
                            if not (answer_end <= span[0] or answer_start >= span[1]):
                                answer_span.append(idx)
                        y1, y2 = answer_span[0], answer_span[-1]
                        y1s.append(y1)
                        y2s.append(y2)
                    example = {"context_tokens": context_tokens, "context_chars": context_chars, "ques_tokens": ques_tokens,
                               "ques_chars": ques_chars, "y1s": y1s, "y2s": y2s, "id": total}
                    examples.append(example)
                    eval_examples[str(total)] = {
                        "context": context, "spans": spans, "answers": answer_texts, "uuid": qa["id"]}
        random.shuffle(examples)
        print("{} questions in total".format(len(examples)))
    return examples, eval_examples 
Example 63
Project: R-Net   Author: HKUST-KnowComp   File: prepro.py    MIT License 4 votes vote down vote up
def prepro(config):
    word_counter, char_counter = Counter(), Counter()
    train_examples, train_eval = process_file(
        config.train_file, "train", word_counter, char_counter)
    dev_examples, dev_eval = process_file(
        config.dev_file, "dev", word_counter, char_counter)
    test_examples, test_eval = process_file(
        config.test_file, "test", word_counter, char_counter)

    word_emb_file = config.fasttext_file if config.fasttext else config.glove_word_file
    char_emb_file = config.glove_char_file if config.pretrained_char else None
    char_emb_size = config.glove_char_size if config.pretrained_char else None
    char_emb_dim = config.glove_dim if config.pretrained_char else config.char_dim

    word2idx_dict = None
    if os.path.isfile(config.word2idx_file):
        with open(config.word2idx_file, "r") as fh:
            word2idx_dict = json.load(fh)
    word_emb_mat, word2idx_dict = get_embedding(word_counter, "word", emb_file=word_emb_file,
                                                size=config.glove_word_size, vec_size=config.glove_dim, token2idx_dict=word2idx_dict)

    char2idx_dict = None
    if os.path.isfile(config.char2idx_file):
        with open(config.char2idx_file, "r") as fh:
            char2idx_dict = json.load(fh)
    char_emb_mat, char2idx_dict = get_embedding(
        char_counter, "char", emb_file=char_emb_file, size=char_emb_size, vec_size=char_emb_dim, token2idx_dict=char2idx_dict)

    build_features(config, train_examples, "train",
                   config.train_record_file, word2idx_dict, char2idx_dict)
    dev_meta = build_features(config, dev_examples, "dev",
                              config.dev_record_file, word2idx_dict, char2idx_dict)
    test_meta = build_features(config, test_examples, "test",
                               config.test_record_file, word2idx_dict, char2idx_dict, is_test=True)

    save(config.word_emb_file, word_emb_mat, message="word embedding")
    save(config.char_emb_file, char_emb_mat, message="char embedding")
    save(config.train_eval_file, train_eval, message="train eval")
    save(config.dev_eval_file, dev_eval, message="dev eval")
    save(config.test_eval_file, test_eval, message="test eval")
    save(config.dev_meta, dev_meta, message="dev meta")
    save(config.word2idx_file, word2idx_dict, message="word2idx")
    save(config.char2idx_file, char2idx_dict, message="char2idx")
    save(config.test_meta, test_meta, message="test meta") 
Example 64
Project: uberlogs   Author: odedlaz   File: public.py    MIT License 4 votes vote down vote up
def install(default_path='logging.json',
            default_level=logging.INFO,
            env_key='LOG_CFG',
            kill_on=None):
    """
    Setup logging configuration
    if the path in the default_path or env_key doesn't exist,
    default level is used,
    and the root handler is set to the formattable stream handler
    """
    import os
    import sys

    path = os.getenv(env_key, default_path)
    if os.path.exists(path):
        with open(path, 'rt') as f:
            import ujson as json
            content = json.load(f)

        logging.config.dictConfig(content)
    else:
        from .formatters import ConcatFormatter
        handler = logging.StreamHandler(sys.stdout)
        fmt = "%(asctime)s - %(name)s - %(levelname)s - %(uber_message)s"
        concatf = ConcatFormatter(fmt=fmt,
                                  delimiter="; ",
                                  operator="= ",
                                  log_in_color=True,
                                  include_format_keywords=False,
                                  parse_text=True)
        handler.setFormatter(concatf)
        logging.root.addHandler(handler)
        logging.root.setLevel(default_level)
        logging.basicConfig(level=default_level)

    # this handler is the last one, and will force exit
    # once a cirtical message has been recieved
    if kill_on is not None:
        from .handlers import KillProcessHandler
        logging.root.addHandler(KillProcessHandler(level=kill_on))

    def log_unhandled(exctype, value, tb):
        getLogger("unhandled").critical("Unhandled Error",
                                        exc_info=(exctype, value, tb))

    sys.excepthook = log_unhandled

    if rewire_twisted_log:
        # clear all observers
        map(globalLogPublisher.removeObserver,
            globalLogPublisher._observers)

        globalLogPublisher.addObserver(STDLibLogObserver()) 
Example 65
Project: Question_Answering_Models   Author: l11x0m7   File: main.py    MIT License 4 votes vote down vote up
def test(config):
    os.environ["CUDA_VISIBLE_DEVICES"] = config.choose_gpu

    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    print("Loading model...")
    test_batch = get_dataset(config.test_record_file, get_record_parser(
        config, is_test=True), config).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, char_mat, trainable=False)

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True
    try:
        sess_config.gpu_options.per_process_gpu_memory_fraction = config.gpu_memory_fraction
    except:
        sess_config.gpu_options.per_process_gpu_memory_fraction = 0.5

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        remapped_dict = {}
        for step in tqdm(range(total // config.batch_size + 1)):
            qa_id, loss, yp1, yp2 = sess.run(
                [model.qa_id, model.loss, model.yp1, model.yp2])
            answer_dict_, remapped_dict_ = convert_tokens(
                eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)
            losses.append(loss)
        loss = np.mean(losses)
        metrics = evaluate(eval_file, answer_dict)
        with open(config.answer_file, "w") as fh:
            json.dump(remapped_dict, fh)
        print("Exact Match: {}, F1: {}".format(
            metrics['exact_match'], metrics['f1'])) 
Example 66
Project: Question_Answering_Models   Author: l11x0m7   File: prepro.py    MIT License 4 votes vote down vote up
def process_file(filename, data_type, word_counter, char_counter):
    print("Generating {} examples...".format(data_type))
    examples = []
    eval_examples = {}
    total = 0
    with open(filename, "r") as fh:
        source = json.load(fh)
        for article in tqdm(source["data"]):
            for para in article["paragraphs"]:
                context = para["context"].replace(
                    "''", '" ').replace("``", '" ')
                context_tokens = word_tokenize(context)
                context_chars = [list(token) for token in context_tokens]
                spans = convert_idx(context, context_tokens)
                for token in context_tokens:
                    word_counter[token] += len(para["qas"])
                    for char in token:
                        char_counter[char] += len(para["qas"])
                for qa in para["qas"]:
                    total += 1
                    ques = qa["question"].replace(
                        "''", '" ').replace("``", '" ')
                    ques_tokens = word_tokenize(ques)
                    ques_chars = [list(token) for token in ques_tokens]
                    for token in ques_tokens:
                        word_counter[token] += 1
                        for char in token:
                            char_counter[char] += 1
                    y1s, y2s = [], []
                    answer_texts = []
                    for answer in qa["answers"]:
                        answer_text = answer["text"]
                        answer_start = answer['answer_start']
                        answer_end = answer_start + len(answer_text)
                        answer_texts.append(answer_text)
                        answer_span = []
                        for idx, span in enumerate(spans):
                            if not (answer_end <= span[0] or answer_start >= span[1]):
                                answer_span.append(idx)
                        y1, y2 = answer_span[0], answer_span[-1]
                        y1s.append(y1)
                        y2s.append(y2)
                    example = {"context_tokens": context_tokens, "context_chars": context_chars, "ques_tokens": ques_tokens,
                               "ques_chars": ques_chars, "y1s": y1s, "y2s": y2s, "id": total}
                    examples.append(example)
                    eval_examples[str(total)] = {
                        "context": context, "spans": spans, "answers": answer_texts, "uuid": qa["id"]}
        random.shuffle(examples)
        print("{} questions in total".format(len(examples)))
    return examples, eval_examples 
Example 67
Project: Question_Answering_Models   Author: l11x0m7   File: prepro.py    MIT License 4 votes vote down vote up
def prepro(config):
    word_counter, char_counter = Counter(), Counter()
    train_examples, train_eval = process_file(
        config.train_file, "train", word_counter, char_counter)
    dev_examples, dev_eval = process_file(
        config.dev_file, "dev", word_counter, char_counter)
    test_examples, test_eval = process_file(
        config.test_file, "test", word_counter, char_counter)

    word_emb_file = config.fasttext_file if config.fasttext else config.glove_word_file
    char_emb_file = config.glove_char_file if config.pretrained_char else None
    char_emb_size = config.glove_char_size if config.pretrained_char else None
    char_emb_dim = config.glove_dim if config.pretrained_char else config.char_dim

    word2idx_dict = None
    if os.path.isfile(config.word2idx_file):
        with open(config.word2idx_file, "r") as fh:
            word2idx_dict = json.load(fh)
    word_emb_mat, word2idx_dict = get_embedding(word_counter, "word", emb_file=word_emb_file,
                                                size=config.glove_word_size, vec_size=config.glove_dim, token2idx_dict=word2idx_dict)

    char2idx_dict = None
    if os.path.isfile(config.char2idx_file):
        with open(config.char2idx_file, "r") as fh:
            char2idx_dict = json.load(fh)
    char_emb_mat, char2idx_dict = get_embedding(
        char_counter, "char", emb_file=char_emb_file, size=char_emb_size, vec_size=char_emb_dim, token2idx_dict=char2idx_dict)

    build_features(config, train_examples, "train",
                   config.train_record_file, word2idx_dict, char2idx_dict)
    dev_meta = build_features(config, dev_examples, "dev",
                              config.dev_record_file, word2idx_dict, char2idx_dict)
    test_meta = build_features(config, test_examples, "test",
                               config.test_record_file, word2idx_dict, char2idx_dict, is_test=True)

    save(config.word_emb_file, word_emb_mat, message="word embedding")
    save(config.char_emb_file, char_emb_mat, message="char embedding")
    save(config.train_eval_file, train_eval, message="train eval")
    save(config.dev_eval_file, dev_eval, message="dev eval")
    save(config.test_eval_file, test_eval, message="test eval")
    save(config.dev_meta, dev_meta, message="dev meta")
    save(config.word2idx_file, word2idx_dict, message="word2idx")
    save(config.char2idx_file, char2idx_dict, message="char2idx")
    save(config.test_meta, test_meta, message="test meta") 
Example 68
Project: Question_Answering_Models   Author: l11x0m7   File: main.py    MIT License 4 votes vote down vote up
def test(config):
    os.environ["CUDA_VISIBLE_DEVICES"] = config.choose_gpu
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    graph = tf.Graph()
    print("Loading model...")
    with graph.as_default() as g:
        test_batch = get_dataset(config.test_record_file, get_record_parser(
            config, is_test=True), config).make_one_shot_iterator()

        model = QANet(config, test_batch, word_mat, char_mat, trainable=False, graph = g)

        sess_config = tf.ConfigProto(allow_soft_placement=True)
        sess_config.gpu_options.allow_growth = True
        sess_config.gpu_options.per_process_gpu_memory_fraction = config.gpu_memory_fraction

        with tf.Session(config=sess_config) as sess:
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver()
            saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
            if config.decay < 1.0:
                sess.run(model.assign_vars)
            losses = []
            answer_dict = {}
            remapped_dict = {}
            for step in tqdm(range(total // config.batch_size + 1)):
                qa_id, loss, yp1, yp2 = sess.run(
                    [model.qa_id, model.loss, model.yp1, model.yp2])
                answer_dict_, remapped_dict_ = convert_tokens(
                    eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
                answer_dict.update(answer_dict_)
                remapped_dict.update(remapped_dict_)
                losses.append(loss)
            loss = np.mean(losses)
            metrics = evaluate(eval_file, answer_dict)
            with open(config.answer_file, "w") as fh:
                json.dump(remapped_dict, fh)
            print("Exact Match: {}, F1: {}".format(
                metrics['exact_match'], metrics['f1'])) 
Example 69
Project: Question_Answering_Models   Author: l11x0m7   File: main.py    MIT License 4 votes vote down vote up
def test(config):
    os.environ["CUDA_VISIBLE_DEVICES"] = config.choose_gpu

    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    print("Loading model...")
    test_batch = get_dataset(config.test_record_file, get_record_parser(
        config, is_test=True), config).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, char_mat, trainable=False)

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True
    try:
        sess_config.gpu_options.per_process_gpu_memory_fraction = config.gpu_memory_fraction
    except:
        sess_config.gpu_options.per_process_gpu_memory_fraction = 0.5

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        remapped_dict = {}
        for step in tqdm(range(total // config.batch_size + 1)):
            qa_id, loss, yp1, yp2 = sess.run(
                [model.qa_id, model.loss, model.yp1, model.yp2])
            answer_dict_, remapped_dict_ = convert_tokens(
                eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)
            losses.append(loss)
        loss = np.mean(losses)
        metrics = evaluate(eval_file, answer_dict)
        with open(config.answer_file, "w") as fh:
            json.dump(remapped_dict, fh)
        print("Exact Match: {}, F1: {}".format(
            metrics['exact_match'], metrics['f1'])) 
Example 70
Project: Question_Answering_Models   Author: l11x0m7   File: prepro.py    MIT License 4 votes vote down vote up
def process_file(filename, data_type, word_counter, char_counter):
    print("Generating {} examples...".format(data_type))
    examples = []
    eval_examples = {}
    total = 0
    with open(filename, "r") as fh:
        source = json.load(fh)
        for article in tqdm(source["data"]):
            for para in article["paragraphs"]:
                context = para["context"].replace(
                    "''", '" ').replace("``", '" ')
                context_tokens = word_tokenize(context)
                context_chars = [list(token) for token in context_tokens]
                spans = convert_idx(context, context_tokens)
                for token in context_tokens:
                    word_counter[token] += len(para["qas"])
                    for char in token:
                        char_counter[char] += len(para["qas"])
                for qa in para["qas"]:
                    total += 1
                    ques = qa["question"].replace(
                        "''", '" ').replace("``", '" ')
                    ques_tokens = word_tokenize(ques)
                    ques_chars = [list(token) for token in ques_tokens]
                    for token in ques_tokens:
                        word_counter[token] += 1
                        for char in token:
                            char_counter[char] += 1
                    y1s, y2s = [], []
                    answer_texts = []
                    for answer in qa["answers"]:
                        answer_text = answer["text"]
                        answer_start = answer['answer_start']
                        answer_end = answer_start + len(answer_text)
                        answer_texts.append(answer_text)
                        answer_span = []
                        for idx, span in enumerate(spans):
                            if not (answer_end <= span[0] or answer_start >= span[1]):
                                answer_span.append(idx)
                        y1, y2 = answer_span[0], answer_span[-1]
                        y1s.append(y1)
                        y2s.append(y2)
                    example = {"context_tokens": context_tokens, "context_chars": context_chars, "ques_tokens": ques_tokens,
                               "ques_chars": ques_chars, "y1s": y1s, "y2s": y2s, "id": total}
                    examples.append(example)
                    eval_examples[str(total)] = {
                        "context": context, "spans": spans, "answers": answer_texts, "uuid": qa["id"]}
        random.shuffle(examples)
        print("{} questions in total".format(len(examples)))
    return examples, eval_examples 
Example 71
Project: Question_Answering_Models   Author: l11x0m7   File: prepro.py    MIT License 4 votes vote down vote up
def prepro(config):
    word_counter, char_counter = Counter(), Counter()
    train_examples, train_eval = process_file(
        config.train_file, "train", word_counter, char_counter)
    dev_examples, dev_eval = process_file(
        config.dev_file, "dev", word_counter, char_counter)
    test_examples, test_eval = process_file(
        config.test_file, "test", word_counter, char_counter)

    word_emb_file = config.fasttext_file if config.fasttext else config.glove_word_file
    char_emb_file = config.glove_char_file if config.pretrained_char else None
    char_emb_size = config.glove_char_size if config.pretrained_char else None
    char_emb_dim = config.glove_dim if config.pretrained_char else config.char_dim

    word2idx_dict = None
    if os.path.isfile(config.word2idx_file):
        with open(config.word2idx_file, "r") as fh:
            word2idx_dict = json.load(fh)
    word_emb_mat, word2idx_dict = get_embedding(word_counter, "word", emb_file=word_emb_file,
                                                size=config.glove_word_size, vec_size=config.glove_dim, token2idx_dict=word2idx_dict)

    char2idx_dict = None
    if os.path.isfile(config.char2idx_file):
        with open(config.char2idx_file, "r") as fh:
            char2idx_dict = json.load(fh)
    char_emb_mat, char2idx_dict = get_embedding(
        char_counter, "char", emb_file=char_emb_file, size=char_emb_size, vec_size=char_emb_dim, token2idx_dict=char2idx_dict)

    build_features(config, train_examples, "train",
                   config.train_record_file, word2idx_dict, char2idx_dict)
    dev_meta = build_features(config, dev_examples, "dev",
                              config.dev_record_file, word2idx_dict, char2idx_dict)
    test_meta = build_features(config, test_examples, "test",
                               config.test_record_file, word2idx_dict, char2idx_dict, is_test=True)

    save(config.word_emb_file, word_emb_mat, message="word embedding")
    save(config.char_emb_file, char_emb_mat, message="char embedding")
    save(config.train_eval_file, train_eval, message="train eval")
    save(config.dev_eval_file, dev_eval, message="dev eval")
    save(config.test_eval_file, test_eval, message="test eval")
    save(config.dev_meta, dev_meta, message="dev meta")
    save(config.word2idx_file, word2idx_dict, message="word2idx")
    save(config.char2idx_file, char2idx_dict, message="char2idx")
    save(config.test_meta, test_meta, message="test meta") 
Example 72
Project: Question_Answering_Models   Author: l11x0m7   File: main.py    MIT License 4 votes vote down vote up
def test(config):
    os.environ["CUDA_VISIBLE_DEVICES"] = config.choose_gpu

    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    print("Loading model...")
    test_batch = get_dataset(config.test_record_file, get_record_parser(
        config, is_test=True), config).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, char_mat, trainable=False)

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True
    try:
        sess_config.gpu_options.per_process_gpu_memory_fraction = config.gpu_memory_fraction
    except:
        sess_config.gpu_options.per_process_gpu_memory_fraction = 0.5

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        remapped_dict = {}
        for step in tqdm(range(total // config.batch_size + 1)):
            qa_id, loss, yp1, yp2 = sess.run(
                [model.qa_id, model.loss, model.yp1, model.yp2])
            answer_dict_, remapped_dict_ = convert_tokens(
                eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)
            losses.append(loss)
        loss = np.mean(losses)
        metrics = evaluate(eval_file, answer_dict)
        with open(config.answer_file, "w") as fh:
            json.dump(remapped_dict, fh)
        print("Exact Match: {}, F1: {}".format(
            metrics['exact_match'], metrics['f1'])) 
Example 73
Project: Question_Answering_Models   Author: l11x0m7   File: prepro.py    MIT License 4 votes vote down vote up
def process_file(filename, data_type, word_counter, char_counter):
    print("Generating {} examples...".format(data_type))
    examples = []
    eval_examples = {}
    total = 0
    with open(filename, "r") as fh:
        source = json.load(fh)
        for article in tqdm(source["data"]):
            for para in article["paragraphs"]:
                context = para["context"].replace(
                    "''", '" ').replace("``", '" ')
                context_tokens = word_tokenize(context)
                context_chars = [list(token) for token in context_tokens]
                spans = convert_idx(context, context_tokens)
                for token in context_tokens:
                    word_counter[token] += len(para["qas"])
                    for char in token:
                        char_counter[char] += len(para["qas"])
                for qa in para["qas"]:
                    total += 1
                    ques = qa["question"].replace(
                        "''", '" ').replace("``", '" ')
                    ques_tokens = word_tokenize(ques)
                    ques_chars = [list(token) for token in ques_tokens]
                    for token in ques_tokens:
                        word_counter[token] += 1
                        for char in token:
                            char_counter[char] += 1
                    y1s, y2s = [], []
                    answer_texts = []
                    for answer in qa["answers"]:
                        answer_text = answer["text"]
                        answer_start = answer['answer_start']
                        answer_end = answer_start + len(answer_text)
                        answer_texts.append(answer_text)
                        answer_span = []
                        for idx, span in enumerate(spans):
                            if not (answer_end <= span[0] or answer_start >= span[1]):
                                answer_span.append(idx)
                        y1, y2 = answer_span[0], answer_span[-1]
                        y1s.append(y1)
                        y2s.append(y2)
                    example = {"context_tokens": context_tokens, "context_chars": context_chars, "ques_tokens": ques_tokens,
                               "ques_chars": ques_chars, "y1s": y1s, "y2s": y2s, "id": total}
                    examples.append(example)
                    eval_examples[str(total)] = {
                        "context": context, "spans": spans, "answers": answer_texts, "uuid": qa["id"]}
        random.shuffle(examples)
        print("{} questions in total".format(len(examples)))
    return examples, eval_examples 
Example 74
Project: QANet-for-SQuAD-2.0   Author: Aadesh-Magare   File: setup.py    MIT License 4 votes vote down vote up
def process_file(filename, data_type, word_counter, char_counter):
    print("Pre-processing {} examples...".format(data_type))
    examples = []
    eval_examples = {}
    total = 0
    with open(filename, "r") as fh:
        source = json.load(fh)
        for article in tqdm(source["data"]):
            for para in article["paragraphs"]:
                context = para["context"].replace(
                    "''", '" ').replace("``", '" ')
                context_tokens = word_tokenize(context)
                context_chars = [list(token) for token in context_tokens]
                spans = convert_idx(context, context_tokens)
                for token in context_tokens:
                    word_counter[token] += len(para["qas"])
                    for char in token:
                        char_counter[char] += len(para["qas"])
                for qa in para["qas"]:
                    total += 1
                    ques = qa["question"].replace(
                        "''", '" ').replace("``", '" ')
                    ques_tokens = word_tokenize(ques)
                    ques_chars = [list(token) for token in ques_tokens]
                    for token in ques_tokens:
                        word_counter[token] += 1
                        for char in token:
                            char_counter[char] += 1
                    y1s, y2s = [], []
                    answer_texts = []
                    for answer in qa["answers"]:
                        answer_text = answer["text"]
                        answer_start = answer['answer_start']
                        answer_end = answer_start + len(answer_text)
                        answer_texts.append(answer_text)
                        answer_span = []
                        for idx, span in enumerate(spans):
                            if not (answer_end <= span[0] or answer_start >= span[1]):
                                answer_span.append(idx)
                        y1, y2 = answer_span[0], answer_span[-1]
                        y1s.append(y1)
                        y2s.append(y2)
                    example = {"context_tokens": context_tokens,
                               "context_chars": context_chars,
                               "ques_tokens": ques_tokens,
                               "ques_chars": ques_chars,
                               "y1s": y1s,
                               "y2s": y2s,
                               "id": total}
                    examples.append(example)
                    eval_examples[str(total)] = {"context": context,
                                                 "question": ques,
                                                 "spans": spans,
                                                 "answers": answer_texts,
                                                 "uuid": qa["id"]}
        print("{} questions in total".format(len(examples)))
    return examples, eval_examples 
Example 75
Project: carbon_xs_gui   Author: lktsui   File: CarbonXS_GUI.py    GNU General Public License v3.0 4 votes vote down vote up
def import_diffractometer_params(self):
        """
        Imports diffractometer files from a JSON file
        The default directory is "/config/diffractometer settings"

        :return:
        """

        if self.default_diffsettings_import_dir:

            fname, opened = QtGui.QFileDialog.getOpenFileName(self, 'Export Diffractometer Settings',
                                                              self.default_diffsettings_import_dir, filter="*.json")
        else:

            fname, opened = QtGui.QFileDialog.getOpenFileName(self, 'Export Diffractometer Settings',
                                                          os.path.join(user_data_directory,
                                                           'config','diffractometer settings'), filter="*.json")

        if fname:

            data_file = open(fname, 'r')
            directory, _ = os.path.split(fname)
            self.default_diffsettings_import_dir = directory

            try:
                diffractometer_settings = ujson.load(data_file)

                self.wavelength.setValue(diffractometer_settings['wavelength'])
                self.beam_width.setValue(diffractometer_settings['beam_width'])
                self.sample_width.setValue(diffractometer_settings['sample_width'])
                self.sample_depth.setValue(diffractometer_settings['sample_depth'])
                self.sample_density.setValue(diffractometer_settings['sample_density'])
                self.goniometer_radius.setValue(diffractometer_settings['gonio_radius'])

                print 'Imported Diffractometer Settings from: %s'%fname

                self.statusBar().showMessage('Imported Diffractometer Settings from: %s'%fname)

            except ValueError:
                print "Error in loading JSON file: %s."%(fname)
                print "Verify that the configuration file is properly formatted."

            except KeyError:
                import_type = 'diffractometer settings'
                print 'Error in importing %s from: %s.'%(import_type, fname)
                print 'Verify that this settings file is the right kind for this import.' 
Example 76
Project: carbon_xs_gui   Author: lktsui   File: CarbonXS_GUI.py    GNU General Public License v3.0 4 votes vote down vote up
def import_fitting_settings(self):

        """
        Import fitting settings from a JSON file
        The default directory is "/config/fitting settings"

        :return:
        """

        if self.default_fitsettings_import_dir:

            fname, opened = QtGui.QFileDialog.getOpenFileName(self, 'Import Fitting Settings',
                                                              self.default_fitsettings_import_dir, filter="*.json")
        else:
            fname, opened = QtGui.QFileDialog.getOpenFileName(self, 'Import Fitting Settings',
                                                           os.path.join(user_data_directory, 'config',
                                                                        'fitting settings'), filter="*.json")

        if fname:

            try:
                data_file = open(fname, 'r')
                directory, _ = os.path.split(fname)
                self.default_fitsettings_import_dir = directory

                fitting_settings = ujson.load(data_file)

                self.theta_min_value.setValue(fitting_settings['theta min'])
                self.theta_max_value.setValue(fitting_settings['theta max'])

                self.iterations.setValue(fitting_settings['iterations'])
                self.nskip.setValue(fitting_settings['nskip'])


                if fitting_settings['layers'] == 1:
                    self.number_layers.setCurrentIndex(0)
                elif fitting_settings['layers'] == 2:
                    self.number_layers.setCurrentIndex(1)
                else:
                    print "Error: Number of layers is not 1 or 2. Setting it to 1."
                    self.number_layers.setCurrentIndex(0)

                self.n_phi.setValue(fitting_settings['nphi'])
                self.n_sg.setValue(fitting_settings['nsg'])
                self.epsilon.setValue(fitting_settings['epsilon'])

                print 'Imported Fitting Settings from: %s'%fname
                self.statusBar().showMessage('Imported Fitting Settings to: %s'%fname)


            except ValueError:
                print "Error in loading JSON file: %s."%(fname)
                print "Verify that the configuration file is properly formatted."

            except KeyError:
                import_type = 'fitting settings'
                print 'Error in importing %s from: %s.'%(import_type, fname)
                print 'Verify that this settings file is the right kind for this import.' 
Example 77
Project: zulip   Author: zulip   File: gitter.py    Apache License 2.0 4 votes vote down vote up
def do_convert_data(gitter_data_file: str, output_dir: str, threads: int=6) -> None:
    #  Subdomain is set by the user while running the import commands
    realm_subdomain = ""
    domain_name = settings.EXTERNAL_HOST

    os.makedirs(output_dir, exist_ok=True)
    # output directory should be empty initially
    if os.listdir(output_dir):
        raise Exception("Output directory should be empty!")

    # Read data from the gitter file
    with open(gitter_data_file, "r") as fp:
        gitter_data = ujson.load(fp)

    realm, avatar_list, user_map = gitter_workspace_to_realm(
        domain_name, gitter_data, realm_subdomain)

    subscriber_map = make_subscriber_map(
        zerver_subscription=realm['zerver_subscription'],
    )

    # For user mentions
    user_short_name_to_full_name = {}
    for userprofile in realm['zerver_userprofile']:
        user_short_name_to_full_name[userprofile['short_name']] = userprofile['full_name']

    convert_gitter_workspace_messages(
        gitter_data, output_dir, subscriber_map, user_map,
        user_short_name_to_full_name)

    avatar_folder = os.path.join(output_dir, 'avatars')
    avatar_realm_folder = os.path.join(avatar_folder, str(realm_id))
    os.makedirs(avatar_realm_folder, exist_ok=True)
    avatar_records = process_avatars(avatar_list, avatar_folder, realm_id, threads)

    attachment = {"zerver_attachment": []}  # type: Dict[str, List[Any]]

    # IO realm.json
    create_converted_data_files(realm, output_dir, '/realm.json')
    # IO emoji records
    create_converted_data_files([], output_dir, '/emoji/records.json')
    # IO avatar records
    create_converted_data_files(avatar_records, output_dir, '/avatars/records.json')
    # IO uploads records
    create_converted_data_files([], output_dir, '/uploads/records.json')
    # IO attachments records
    create_converted_data_files(attachment, output_dir, '/attachment.json')

    subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P'])

    logging.info('######### DATA CONVERSION FINISHED #########\n')
    logging.info("Zulip data dump created at %s" % (output_dir,)) 
Example 78
Project: zulip   Author: zulip   File: slack.py    Apache License 2.0 4 votes vote down vote up
def process_long_term_idle_users(slack_data_dir: str, users: List[ZerverFieldsT],
                                 slack_user_id_to_zulip_user_id: SlackToZulipUserIDT,
                                 added_channels: AddedChannelsT,
                                 added_mpims: AddedMPIMsT, dm_members: DMMembersT,
                                 zerver_userprofile: List[ZerverFieldsT]) -> Set[int]:
    """Algorithmically, we treat users who have sent at least 10 messages
    or have sent a message within the last 60 days as active.
    Everyone else is treated as long-term idle, which means they will
    have a slighly slower first page load when coming back to
    Zulip.
    """
    all_messages = get_messages_iterator(slack_data_dir, added_channels, added_mpims, dm_members)

    sender_counts = defaultdict(int)  # type: Dict[str, int]
    recent_senders = set()  # type: Set[str]
    NOW = float(timezone_now().timestamp())
    for message in all_messages:
        timestamp = float(message['ts'])
        slack_user_id = get_message_sending_user(message)
        if not slack_user_id:
            continue

        if slack_user_id in recent_senders:
            continue

        if NOW - timestamp < 60:
            recent_senders.add(slack_user_id)

        sender_counts[slack_user_id] += 1
    for (slack_sender_id, count) in sender_counts.items():
        if count > 10:
            recent_senders.add(slack_sender_id)

    long_term_idle = set()

    for slack_user in users:
        if slack_user["id"] in recent_senders:
            continue
        zulip_user_id = slack_user_id_to_zulip_user_id[slack_user['id']]
        long_term_idle.add(zulip_user_id)

    for user_profile_row in zerver_userprofile:
        if user_profile_row['id'] in long_term_idle:
            user_profile_row['long_term_idle'] = True
            # Setting last_active_message_id to 1 means the user, if
            # imported, will get the full message history for the
            # streams they were on.
            user_profile_row['last_active_message_id'] = 1

    return long_term_idle 
Example 79
Project: zulip   Author: zulip   File: test_import_export.py    Apache License 2.0 4 votes vote down vote up
def _export_realm(self, realm: Realm, exportable_user_ids: Optional[Set[int]]=None,
                      consent_message_id: Optional[int]=None) -> Dict[str, Any]:
        output_dir = self._make_output_dir()
        with patch('logging.info'), patch('zerver.lib.export.create_soft_link'):
            do_export_realm(
                realm=realm,
                output_dir=output_dir,
                threads=0,
                exportable_user_ids=exportable_user_ids,
                consent_message_id=consent_message_id,
            )
            export_usermessages_batch(
                input_path=os.path.join(output_dir, 'messages-000001.json.partial'),
                output_path=os.path.join(output_dir, 'messages-000001.json'),
                consent_message_id=consent_message_id,
            )

            try:
                export_usermessages_batch(
                    input_path=os.path.join(output_dir, 'messages-000002.json.partial'),
                    output_path=os.path.join(output_dir, 'messages-000002.json'),
                    consent_message_id=consent_message_id,
                )
            except FileNotFoundError:
                pass

        def read_file(fn: str) -> Any:
            full_fn = os.path.join(output_dir, fn)
            with open(full_fn) as f:
                return ujson.load(f)

        result = {}
        result['realm'] = read_file('realm.json')
        result['attachment'] = read_file('attachment.json')
        result['message'] = read_file('messages-000001.json')
        try:
            message = read_file('messages-000002.json')
            result["message"]["zerver_usermessage"].extend(message["zerver_usermessage"])
            result["message"]["zerver_message"].extend(message["zerver_message"])
        except FileNotFoundError:
            pass
        result['uploads_dir'] = os.path.join(output_dir, 'uploads')
        result['uploads_dir_records'] = read_file(os.path.join('uploads', 'records.json'))
        result['emoji_dir'] = os.path.join(output_dir, 'emoji')
        result['emoji_dir_records'] = read_file(os.path.join('emoji', 'records.json'))
        result['avatar_dir'] = os.path.join(output_dir, 'avatars')
        result['avatar_dir_records'] = read_file(os.path.join('avatars', 'records.json'))
        return result 
Example 80
Project: zulip   Author: zulip   File: test_gitter_importer.py    Apache License 2.0 4 votes vote down vote up
def test_gitter_import_data_conversion(self, mock_process_avatars: mock.Mock) -> None:
        output_dir = self.make_import_output_dir("gitter")
        gitter_file = os.path.join(os.path.dirname(__file__), 'fixtures/gitter_data.json')
        do_convert_data(gitter_file, output_dir)

        def read_file(output_file: str) -> Any:
            full_path = os.path.join(output_dir, output_file)
            with open(full_path) as f:
                return ujson.load(f)

        self.assertEqual(os.path.exists(os.path.join(output_dir, 'avatars')), True)
        self.assertEqual(os.path.exists(os.path.join(output_dir, 'emoji')), True)
        self.assertEqual(os.path.exists(os.path.join(output_dir, 'attachment.json')), True)

        realm = read_file('realm.json')

        # test realm
        self.assertEqual('Organization imported from Gitter!',
                         realm['zerver_realm'][0]['description'])

        # test users
        exported_user_ids = self.get_set(realm['zerver_userprofile'], 'id')
        exported_user_full_name = self.get_set(realm['zerver_userprofile'], 'full_name')
        self.assertIn('User Full Name', exported_user_full_name)
        exported_user_email = self.get_set(realm['zerver_userprofile'], 'email')
        self.assertIn('[email protected]', exported_user_email)

        # test stream
        self.assertEqual(len(realm['zerver_stream']), 1)
        self.assertEqual(realm['zerver_stream'][0]['name'], 'from gitter')
        self.assertEqual(realm['zerver_stream'][0]['deactivated'], False)
        self.assertEqual(realm['zerver_stream'][0]['realm'], realm['zerver_realm'][0]['id'])

        self.assertEqual(realm['zerver_defaultstream'][0]['stream'], realm['zerver_stream'][0]['id'])

        # test recipient
        exported_recipient_id = self.get_set(realm['zerver_recipient'], 'id')
        exported_recipient_type = self.get_set(realm['zerver_recipient'], 'type')
        self.assertEqual(set([1, 2]), exported_recipient_type)

        # test subscription
        exported_subscription_userprofile = self.get_set(realm['zerver_subscription'], 'user_profile')
        self.assertEqual(set([0, 1]), exported_subscription_userprofile)
        exported_subscription_recipient = self.get_set(realm['zerver_subscription'], 'recipient')
        self.assertEqual(len(exported_subscription_recipient), 3)
        self.assertIn(realm['zerver_subscription'][1]['recipient'], exported_recipient_id)

        messages = read_file('messages-000001.json')

        # test messages
        exported_messages_id = self.get_set(messages['zerver_message'], 'id')
        self.assertIn(messages['zerver_message'][0]['sender'], exported_user_ids)
        self.assertIn(messages['zerver_message'][1]['recipient'], exported_recipient_id)
        self.assertIn(messages['zerver_message'][0]['content'], 'test message')

        # test usermessages
        exported_usermessage_userprofile = self.get_set(messages['zerver_usermessage'], 'user_profile')
        self.assertEqual(exported_user_ids, exported_usermessage_userprofile)
        exported_usermessage_message = self.get_set(messages['zerver_usermessage'], 'message')
        self.assertEqual(exported_usermessage_message, exported_messages_id)