Python ujson.dump() Examples

The following are code examples for showing how to use ujson.dump(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: DoubanMovieTool   Author: frostnotfall   File: utils.py    MIT License 6 votes vote down vote up
def aiohttp_check_cookie():
    try:
        with open('cookies.json', 'r', encoding="UTF-8") as f:
            cookies = ujson.load(f)
        async with aiohttp.ClientSession(cookie_jar=aiohttp.CookieJar(),
                                         headers=head,
                                         cookies=cookies) as session:
            async with session.get('https://movie.douban.com'):
                pass
    except (ValueError, FileNotFoundError):
        async with aiohttp.ClientSession(cookie_jar=aiohttp.CookieJar(),
                                         headers=head) as session:
            async with session.get('https://movie.douban.com') as res:
                cookies = session.cookie_jar.filter_cookies('https://movie.douban.com')
                for key, cookie in res.cookies.items():
                    cookies[cookie.key] = cookie.value
                with open('cookies.json', 'w') as f:
                    ujson.dump(cookies, f) 
Example 2
Project: zulip   Author: zulip   File: event_queue.py    Apache License 2.0 6 votes vote down vote up
def setup_event_queue(port: int) -> None:
    if not settings.TEST_SUITE:
        load_event_queues(port)
        atexit.register(dump_event_queues, port)
        # Make sure we dump event queues even if we exit via signal
        signal.signal(signal.SIGTERM, lambda signum, stack: sys.exit(1))
        add_reload_hook(lambda: dump_event_queues(port))

    try:
        os.rename(persistent_queue_filename(port), persistent_queue_filename(port, last=True))
    except OSError:
        pass

    # Set up event queue garbage collection
    ioloop = tornado.ioloop.IOLoop.instance()
    pc = tornado.ioloop.PeriodicCallback(lambda: gc_event_queues(port),
                                         EVENT_QUEUE_GC_FREQ_MSECS, ioloop)
    pc.start()

    send_restart_events(immediate=settings.DEVELOPMENT) 
Example 3
Project: zulip   Author: zulip   File: compilemessages.py    Apache License 2.0 6 votes vote down vote up
def create_language_name_map(self) -> None:
        join = os.path.join
        deploy_root = settings.DEPLOY_ROOT
        path = join(deploy_root, 'locale', 'language_options.json')
        output_path = join(deploy_root, 'locale', 'language_name_map.json')

        with open(path, 'r') as reader:
            languages = ujson.load(reader)
            lang_list = []
            for lang_info in languages['languages']:
                lang_info['name'] = lang_info['name_local']
                del lang_info['name_local']
                lang_list.append(lang_info)

            lang_list.sort(key=lambda lang: lang['name'])

        with open(output_path, 'w') as output_file:
            ujson.dump({'name_map': lang_list}, output_file, indent=4, sort_keys=True)
            output_file.write('\n') 
Example 4
Project: ecabc   Author: ECRL   File: abc.py    MIT License 6 votes vote down vote up
def save_settings(self, filename):
        '''Save settings to a JSON file

        Arge:
            filename (string): name of the file to save to
        '''

        data = dict()
        data['valueRanges'] = self._value_ranges
        data['best_values'] = [str(value) for value in self._best_values]
        data['minimize'] = self._minimize
        data['num_employers'] = self._num_employers
        data['best_score'] = str(self._best_score)
        data['limit'] = self._limit
        data['best_error'] = self._best_error
        with open(filename, 'w') as outfile:
            json.dump(data, outfile, indent=4, sort_keys=True) 
Example 5
Project: DBLPParser   Author: IsaacChanghau   File: filter_and_statistic.py    MIT License 6 votes vote down vote up
def filter_records(dataset, save_path):
    results = []
    for record in tqdm(dataset, desc='Filter records'):
        title, author, year, journal, pages = record['title'][0], record['author'], record['year'][0], \
                                              record['journal'][0], record['pages'][0]
        if journal == 'IACR Cryptology ePrint Archive':
            print("\t".join([title, ', '.join(author), year, journal, pages]))
        # pre-defined method to filter records
        tokens = word_tokenize(title)
        if len(tokens) < 4 or len(tokens) > 20:
            continue
        if int(year) < 1980:
            continue
        if len(author) > 6:
            continue
        if journal not in journals:
            continue
        if int(pages) < 2 or int(pages) > 40:
            continue
        results.append(record)
    with codecs.open(save_path, mode='w', encoding='utf8') as f:
        ujson.dump(results, f)
    return results 
Example 6
Project: cvrdata   Author: gronlund   File: cvr_download.py    MIT License 6 votes vote down vote up
def download_all_dicts_to_file(filename, search, mode='w'):
    """ Download data from elastic search server

    :param filename: str, name of file to save data
    :param search: elasticsearch search object to query
    :param mode, char, file write mode (w, a)
    :return filename, str:
    """
    print('Download Data Write to File')
    print('ElasticSearch Download Scan Query: ', str(search.to_dict())[0:1000], ' ...')
    generator = search.scan()
    with open(filename, mode) as f:
        for obj in tqdm.tqdm(generator):
            json.dump(obj.to_dict(), f)
            f.write('\n')
    print('Updates Downloaded - File {0} written'.format(filename))
    return filename 
Example 7
Project: iris   Author: linkedin   File: retention.py    BSD 2-Clause "Simplified" License 6 votes vote down vote up
def archive_incident(incident_row, archive_path):
    incident = {field[1]: incident_row[i] for i, field in enumerate(incident_fields)}

    created = incident['created']
    incident_dir = os.path.join(archive_path, str(created.year), str(created.month), str(created.day), str(incident['incident_id']))

    try:
        os.makedirs(incident_dir)
    except OSError as e:
        if e.errno != errno.EEXIST:
            logger.exception('Failed creating %s DIR', incident_dir)
            return

    incident_file = os.path.join(incident_dir, 'incident_data.json')

    try:
        with open(incident_file, 'w') as handle:
            ujson.dump(incident, handle, indent=2)
    except IOError:
        logger.exception('Failed writing incident metadata to %s', incident_file) 
Example 8
Project: iris   Author: linkedin   File: retention.py    BSD 2-Clause "Simplified" License 6 votes vote down vote up
def archive_message(message_row, archive_path):
    message = {field[1]: message_row[i] for i, field in enumerate(message_fields)}

    created = message['created']
    incident_dir = os.path.join(archive_path, str(created.year), str(created.month), str(created.day), str(message['incident_id']))

    try:
        os.makedirs(incident_dir)
    except OSError as e:
        if e.errno != errno.EEXIST:
            logger.exception('Failed creating %s DIR', incident_dir)
            return

    message_file = os.path.join(incident_dir, 'message_%d.json' % message['message_id'])

    try:
        with open(message_file, 'w') as handle:
            ujson.dump(message, handle, indent=2)
    except IOError:
        logger.exception('Failed writing message to %s', message_file) 
Example 9
Project: iris   Author: linkedin   File: retention.py    BSD 2-Clause "Simplified" License 6 votes vote down vote up
def archive_comment(comment_row, archive_path):
    comment = {field[1]: comment_row[i] for i, field in enumerate(comment_fields)}

    created = comment['created']
    incident_dir = os.path.join(archive_path, str(created.year), str(created.month), str(created.day), str(comment['incident_id']))

    try:
        os.makedirs(incident_dir)
    except OSError as e:
        if e.errno != errno.EEXIST:
            logger.exception('Failed creating %s DIR', incident_dir)
            return

    comment_file = os.path.join(incident_dir, 'comment_%d.json' % comment['comment_id'])

    try:
        with open(comment_file, 'w') as handle:
            ujson.dump(comment, handle, indent=2)
    except IOError:
        logger.exception('Failed writing comment to %s', comment_file) 
Example 10
Project: salome-kratos-converter   Author: philbucher   File: converter_gui.py    GNU General Public License v3.0 6 votes vote down vote up
def _ExportConverterScheme(self):
        if len(self.tree.get_children()) == 0:
            self.PlotCmdOutput("Nothing to be exported", "red")
        else:
            input_save_file_path = tk.filedialog.asksaveasfilename(title="Select file",
                                        filetypes=[("converter files","*" + utils.conv_scheme_file_ending)])
            if input_save_file_path:
                if not input_save_file_path.endswith(utils.conv_scheme_file_ending):
                    input_save_file_path += utils.conv_scheme_file_ending

                model_part_dict = self.model_part.AssembleMeshInfoDict()

                # Add general information to file
                model_part_dict.update({"general" : global_utils.GetGeneralInfoDict(utils.VERSION)})

                with open(input_save_file_path, "w") as save_file:
                    json.dump(model_part_dict, save_file, sort_keys = True, indent = 4)

                self.PlotCmdOutput("Exported the Scheme", "green") 
Example 11
Project: llp   Author: quadrismegistus   File: text.py    MIT License 6 votes vote down vote up
def save_freqs_json(self,ofolder=None,force=False):
		if not self.id: return {}
		if not ofolder: ofolder=self.corpus.path_freqs
		ofnfn=os.path.join(ofolder,self.id+'.json')
		opath = os.path.split(ofnfn)[0]
		if not os.path.exists(opath): os.makedirs(opath)
		if not force and os.path.exists(ofnfn) and os.stat(ofnfn).st_size:
			#print('>> already tokenized:',self.id)
			return self.freqs_json
		else:
			pass
			#print('>> tokenizing:',self.id,ofnfn)

		#print(self, self.id, self.path_txt, self.exists_txt, self.txt[:10])
		toks=tokenize_text(self.txt)
		#print(len(toks),ofnfn)
		tokd=dict(Counter(toks))
		with open(ofnfn,'w') as of:
			json.dump(tokd,of)
		return tokd 
Example 12
Project: cappuccino   Author: FoxDev   File: userdb.py    GNU General Public License v3.0 5 votes vote down vote up
def sync(self, force=False):
        # Only write to disk once every 5 minutes so seen.py doesn't kill performance with constant writes.
        if force or abs((datetime.now() - self.last_write).seconds) >= 60 * 5:
            with self.file.open('w') as fd:
                json.dump(self.data, fd)
            self.last_write = datetime.now()
            self.bot.log.debug('Synced database to disk.') 
Example 13
Project: devtools-parser   Author: WPO-Foundation   File: devtools_parser.py    Apache License 2.0 5 votes vote down vote up
def write(self):
        """Write out the resulting json data"""
        if self.out_file is not None and len(self.result['pageData']) and \
            len(self.result['requests']):
            try:
                _, ext = os.path.splitext(self.out_file)
                if ext.lower() == '.gz':
                    with gzip.open(self.out_file, 'wb') as f_out:
                        json.dump(self.result, f_out)
                else:
                    with open(self.out_file, 'w') as f_out:
                        json.dump(self.result, f_out)
            except Exception:
                logging.critical("Error writing to " + self.out_file) 
Example 14
Project: diplomacy   Author: diplomacy   File: server.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def save_json_on_disk(filename, json_dict):
    """ Save given JSON dictionary into given filename and back-up previous file version if exists. """
    if os.path.exists(filename):
        os.rename(filename, get_backup_filename(filename))
    with open(filename, 'w') as file:
        json.dump(json_dict, file) 
Example 15
Project: RPGBot   Author: henry232323   File: RPGBot.py    GNU General Public License v3.0 5 votes vote down vote up
def shutdown(self):
        with open("savedata/prefixes.json", 'w') as prf:
            json.dump(self.prefixes, prf)

        await self.session.close() 
Example 16
Project: R-Net   Author: HKUST-KnowComp   File: prepro.py    MIT License 5 votes vote down vote up
def save(filename, obj, message=None):
    if message is not None:
        print("Saving {}...".format(message))
        with open(filename, "w") as fh:
            json.dump(obj, fh) 
Example 17
Project: tensorpack-mask-rcnn   Author: armandmcqueen   File: dataset.py    Apache License 2.0 5 votes vote down vote up
def eval_or_save_inference_results(self, results, dataset, output=None):
        """
        Args:
            results (list[dict]): the inference results as dicts.
                Each dict corresponds to one __instance__. It contains the following keys:

                image_id (str): the id that matches `load_inference_roidbs`.
                category_id (int): the category prediction, in range [1, #category]
                bbox (list[float]): x1, y1, x2, y2
                score (float):
                segmentation: the segmentation mask in COCO's rle format.

            dataset (str): the name of the dataset to evaluate.
            output (str): the output file to optionally save the results to.

        Returns:
            dict: the evaluation results.
        """
        continuous_id_to_COCO_id = {v: k for k, v in COCODetection.COCO_id_to_category_id.items()}
        for res in results:
            # convert to COCO's incontinuous category id
            res['category_id'] = continuous_id_to_COCO_id[res['category_id']]
            # COCO expects results in xywh format
            box = res['bbox']
            box[2] -= box[0]
            box[3] -= box[1]
            res['bbox'] = [round(float(x), 3) for x in box]

        assert output is not None, "COCO evaluation requires an output file!"
        with open(output, 'w') as f:
            json.dump(results, f)
        if len(output):
            # sometimes may crash if the results are empty?
            return COCODetection(cfg.DATA.BASEDIR, dataset).print_coco_metrics(output)
        else:
            return {}

    # code for singleton: 
Example 18
Project: cuny-bdif   Author: aristotle-tek   File: cleanjson.py    MIT License 5 votes vote down vote up
def write_json_rows(rows, docfilepath, encode_html=True):
        """ Note that we are appending"""
        with open(docfilepath, 'a') as fp:
                for row in rows:
                        ujson.dump(row, fp, encode_html_chars=encode_html)
                        fp.write('\n')
#-----------------------------------------------------------

#----------------------------------------------------------- 
Example 19
Project: trace-parser   Author: WPO-Foundation   File: trace_parser.py    Apache License 2.0 5 votes vote down vote up
def write_json(self, out_file, json_data):
        """Write out one of the internal structures as a json blob"""
        try:
            _, ext = os.path.splitext(out_file)
            if ext.lower() == '.gz':
                with gzip.open(out_file, 'wb') as f:
                    json.dump(json_data, f)
            else:
                with open(out_file, 'w') as f:
                    json.dump(json_data, f)
        except BaseException:
            logging.critical("Error writing to " + out_file) 
Example 20
Project: ncdu-s3   Author: EverythingMe   File: ncdu_data_writer.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def __init__(self, output, root):
        """
        :type output: io.RawIOBase
        :type root: str
        """

        self.output = output
        self.depth = 0

        self.output.write('[1,0,')
        json.dump({'progname': 'ncdu-s3', 'progver': '0.1', 'timestamp': int(time.time())}, self.output)

        # ncdu data format must begin with a directory
        self.dir_enter(root) 
Example 21
Project: ncdu-s3   Author: EverythingMe   File: ncdu_data_writer.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def dir_enter(self, name):
        """
        :type name: str
        """

        self.depth += 1

        self.output.write(",\n")

        self.output.write('[')
        json.dump({'name': name}, self.output) 
Example 22
Project: ncdu-s3   Author: EverythingMe   File: ncdu_data_writer.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def file_entry(self, name, size):
        """
        :type name: str
        :type size: int
        """

        self.output.write(",\n")

        json.dump({'name': name, 'dsize': size}, self.output) 
Example 23
Project: Question_Answering_Models   Author: l11x0m7   File: prepro.py    MIT License 5 votes vote down vote up
def save(filename, obj, message=None):
    if message is not None:
        print("Saving {}...".format(message))
        with open(filename, "w") as fh:
            json.dump(obj, fh) 
Example 24
Project: Question_Answering_Models   Author: l11x0m7   File: prepro.py    MIT License 5 votes vote down vote up
def save(filename, obj, message=None):
    if message is not None:
        print("Saving {}...".format(message))
        with open(filename, "w") as fh:
            json.dump(obj, fh) 
Example 25
Project: Question_Answering_Models   Author: l11x0m7   File: prepro.py    MIT License 5 votes vote down vote up
def save(filename, obj, message=None):
    if message is not None:
        print("Saving {}...".format(message))
        with open(filename, "w") as fh:
            json.dump(obj, fh) 
Example 26
Project: QANet-for-SQuAD-2.0   Author: Aadesh-Magare   File: setup.py    MIT License 5 votes vote down vote up
def save(filename, obj, message=None):
    if message is not None:
        print("Saving {}...".format(message))
        with open(filename, "w") as fh:
            json.dump(obj, fh) 
Example 27
Project: slp   Author: georgepar   File: system.py    MIT License 5 votes vote down vote up
def pickle_dump(data: Any, fname: str) -> None:
    with open(fname, 'wb') as fd:
        pickle.dump(data, fd) 
Example 28
Project: slp   Author: georgepar   File: system.py    MIT License 5 votes vote down vote up
def json_dump(data: types.GenericDict, fname: str) -> None:
    with open(fname, 'w') as fd:
        json.dump(data, fd) 
Example 29
Project: zulip   Author: nthien   File: event_queue.py    Apache License 2.0 5 votes vote down vote up
def dump_event_queues():
    start = time.time()

    with file(settings.JSON_PERSISTENT_QUEUE_FILENAME, "w") as stored_queues:
        ujson.dump([(qid, client.to_dict()) for (qid, client) in clients.iteritems()],
                   stored_queues)

    logging.info('Tornado dumped %d event queues in %.3fs'
                 % (len(clients), time.time() - start)) 
Example 30
Project: zulip   Author: nthien   File: event_queue.py    Apache License 2.0 5 votes vote down vote up
def setup_event_queue():
    load_event_queues()
    atexit.register(dump_event_queues)
    # Make sure we dump event queues even if we exit via signal
    signal.signal(signal.SIGTERM, lambda signum, stack: sys.exit(1))

    try:
        os.rename(settings.PERSISTENT_QUEUE_FILENAME, "/var/tmp/event_queues.pickle.last")
    except OSError:
        pass

    try:
        os.rename(settings.JSON_PERSISTENT_QUEUE_FILENAME, "/var/tmp/event_queues.json.last")
    except OSError:
        pass

    # Set up event queue garbage collection
    ioloop = tornado.ioloop.IOLoop.instance()
    pc = tornado.ioloop.PeriodicCallback(gc_event_queues,
                                         EVENT_QUEUE_GC_FREQ_MSECS, ioloop)
    pc.start()

    send_restart_events()

# The following functions are called from Django

# Workaround to support the Python-requests 1.0 transition of .json
# from a property to a function 
Example 31
Project: clea   Author: scieloorg   File: __main__.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def main(xml_files, jsonl_output):
    for xml_file in xml_files:
        ujson.dump(xml2dict(xml_file), jsonl_output)
        jsonl_output.write("\n") 
Example 32
Project: carbon_xs_gui   Author: lktsui   File: CarbonXS_GUI.py    GNU General Public License v3.0 5 votes vote down vote up
def write_config(self):

        """
        Exports the current program configuration to JSON
        
        
        :return: 
        """

        configuration = {'program_config':
                             {'last_header_lines':self.last_header_lines_used,
                              'last_separator':self.last_separator_used,
                              'lock_x':bool(self.lock_x_axis.checkState()),
                              'lock_y':bool(self.lock_y_axis.checkState()),
                              'show_previous_fit':bool(self.show_previous.checkState()),

                              },
                          'file_defaults':
                              {'diff_data_dir':self.default_diff_data_dir,
                               'export_dir':self.default_export_dir,
                               'export_carboninp':self.default_carboninp_export_dir,
                               'import_carboninp':self.default_carboninp_import_dir,
                               'export_fitparams':self.default_fitparams_export_dir,
                               'import_fitparams':self.default_fitparams_import_dir,
                               'export_fitsettings':self.default_fitsettings_export_dir,
                               'import_fitsettings':self.default_fitsettings_import_dir,
                               'export_diffsettings':self.default_diffsettings_export_dir,
                               'import_diffsettings':self.default_diffsettings_import_dir,


                              }

                         }

        config_file = open(os.path.join(user_data_directory, 'config','config.json'), 'w')

        ujson.dump(configuration, config_file, indent=4) 
Example 33
Project: carbon_xs_gui   Author: lktsui   File: CarbonXS_GUI.py    GNU General Public License v3.0 5 votes vote down vote up
def export_diffractometer_params(self):
        """
        Exports currently loaded diffractometer files to a JSON file
        The default directory is "/config/diffractometer settings"

        :return:
        """

        if self.default_diffsettings_export_dir:

            fname, opened = QtGui.QFileDialog.getSaveFileName(self, 'Export File',
                                                              self.default_diffsettings_export_dir, filter="*.json")
        else:
            fname, opened = QtGui.QFileDialog.getSaveFileName(self, 'Export File',
                                                          os.path.join(user_data_directory,
                                                          'config', 'diffractometer settings'), filter="*.json")

        if fname:

            data_file = open(fname, 'w')
            directory, _ = os.path.split(fname)
            self.default_diffsettings_export_dir = directory

            diffractometer_settings = {"wavelength":self.wavelength.value(),
                                       "beam_width":self.beam_width.value(),
                                       "sample_width":self.sample_width.value(),
                                       "sample_depth":self.sample_depth.value(),
                                       "sample_density":self.sample_density.value(),
                                       "gonio_radius":self.goniometer_radius.value(),


                                       }


            ujson.dump(diffractometer_settings, data_file, indent = 4)

            print 'Exported Diffractometer Settings to: %s'%fname
            self.statusBar().showMessage('Exported Diffractometer Settings to: %s'%fname) 
Example 34
Project: carbon_xs_gui   Author: lktsui   File: CarbonXS_GUI.py    GNU General Public License v3.0 5 votes vote down vote up
def export_fitting_params(self):
        """
        Exports currently loaded fitting parameters files to a JSON file
        The default directory is "/config/fitting parameters"

        :return:
        """

        if self.default_fitparams_export_dir:

            fname, opened = QtGui.QFileDialog.getSaveFileName(self, 'Export Fitting Parameters',
                                                              self.default_fitparams_export_dir, filter="*.json")
        else:
            fname, opened = QtGui.QFileDialog.getSaveFileName(self, 'Export Fitting Parameters',
                                                          os.path.join(user_data_directory,
                                                         'config', 'fitting parameters'), filter="*.json")

        if fname:

            data_file = open(fname, 'w')

            directory, _ = os.path.split(fname)
            self.default_fitparams_export_dir = directory

            fitting_params = {}

            for index, label in enumerate(self.parameter_labels):

                fitting_params[label.text()] = (self.parameter_list[index].value(), self.parameter_enable_list[index].isChecked())

            # Include the number of layers used in this model with the fitting parameters export
            fitting_params['number_layers'] = self.number_layers.currentIndex()+1


            ujson.dump(fitting_params, data_file, indent = 4)

            print 'Exported Fitting Parameters to: %s'%fname
            self.statusBar().showMessage('Exported Fitting Parameters to: %s'%fname) 
Example 35
Project: zulip   Author: zulip   File: import_util.py    Apache License 2.0 5 votes vote down vote up
def create_converted_data_files(data: Any, output_dir: str, file_path: str) -> None:
    output_file = output_dir + file_path
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    with open(output_file, 'w') as fp:
        ujson.dump(data, fp, indent=4) 
Example 36
Project: zulip   Author: zulip   File: event_queue.py    Apache License 2.0 5 votes vote down vote up
def dump_event_queues(port: int) -> None:
    start = time.time()

    with open(persistent_queue_filename(port), "w") as stored_queues:
        ujson.dump([(qid, client.to_dict()) for (qid, client) in clients.items()],
                   stored_queues)

    logging.info('Tornado %d dumped %d event queues in %.3fs'
                 % (port, len(clients), time.time() - start)) 
Example 37
Project: zulip   Author: zulip   File: export.py    Apache License 2.0 5 votes vote down vote up
def export_uploads_from_local(realm: Realm, local_dir: Path, output_dir: Path) -> None:

    count = 0
    records = []
    for attachment in Attachment.objects.filter(realm_id=realm.id):
        local_path = os.path.join(local_dir, attachment.path_id)
        output_path = os.path.join(output_dir, attachment.path_id)
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        shutil.copy2(local_path, output_path)
        stat = os.stat(local_path)
        record = dict(realm_id=attachment.realm_id,
                      user_profile_id=attachment.owner.id,
                      user_profile_email=attachment.owner.email,
                      s3_path=attachment.path_id,
                      path=attachment.path_id,
                      size=stat.st_size,
                      last_modified=stat.st_mtime,
                      content_type=None)
        records.append(record)

        count += 1

        if (count % 100 == 0):
            logging.info("Finished %s" % (count,))
    with open(os.path.join(output_dir, "records.json"), "w") as records_file:
        ujson.dump(records, records_file, indent=4) 
Example 38
Project: zulip   Author: zulip   File: export.py    Apache License 2.0 5 votes vote down vote up
def export_emoji_from_local(realm: Realm, local_dir: Path, output_dir: Path) -> None:

    count = 0
    records = []
    for realm_emoji in RealmEmoji.objects.filter(realm_id=realm.id):
        emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format(
            realm_id=realm.id,
            emoji_file_name=realm_emoji.file_name
        )
        local_path = os.path.join(local_dir, emoji_path)
        output_path = os.path.join(output_dir, emoji_path)
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        shutil.copy2(local_path, output_path)
        # Realm Emoji author is optional.
        author = realm_emoji.author
        author_id = None
        if author:
            author_id = realm_emoji.author.id
        record = dict(realm_id=realm.id,
                      author=author_id,
                      path=emoji_path,
                      s3_path=emoji_path,
                      file_name=realm_emoji.file_name,
                      name=realm_emoji.name,
                      deactivated=realm_emoji.deactivated)
        records.append(record)

        count += 1
        if (count % 100 == 0):
            logging.info("Finished %s" % (count,))
    with open(os.path.join(output_dir, "records.json"), "w") as records_file:
        ujson.dump(records, records_file, indent=4) 
Example 39
Project: tracegen   Author: secworks   File: tracegen.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def dump_traces(dest_dir, base_name, traces, verbose=False):
    if dest_dir[-1] != "/":
        dest_dir += "/"

    dst_file_ctr = 0
    cipher_db = {}
    for (trace, ciphertext) in traces:
        dst_file_name = dest_dir + base_name + "_" + str(dst_file_ctr).zfill(8) + ".dpa"
        if verbose:
            print("Writing converted data to file: %s" % dst_file_name)

        with open(dst_file_name, 'wb') as curr_dst_file:
            ujson.dump(trace, curr_dst_file)
        dst_file_ctr += 1
        cipher_db[dst_file_name] = ciphertext

    # Save the DB with ciphertexts and file names.
    dst_cipher_file_name = dest_dir + base_name + "_ciphertexts.dpb"
    with open(dst_cipher_file_name, 'wb') as curr_dst_file:
        pickle.dump(cipher_db, curr_dst_file, pickle.HIGHEST_PROTOCOL)


#-------------------------------------------------------------------
# gen_traces()
#
# Generate num_traces number of traces, each with num_samples.
# The traces ares stored in files in the destdir. There is also
# a DB crated that links the trace files to the generated
# ciphertexts.
#------------------------------------------------------------------- 
Example 40
Project: QANet   Author: NLPLearn   File: prepro.py    MIT License 5 votes vote down vote up
def save(filename, obj, message=None):
    if message is not None:
        print("Saving {}...".format(message))
        with open(filename, "w") as fh:
            json.dump(obj, fh) 
Example 41
Project: S-NET   Author: noble6emc2   File: prepro.py    MIT License 5 votes vote down vote up
def save(filename, obj, message=None):
    if message is not None:
        print("Saving {}...".format(message))
        with open(filename, "w") as fh:
            json.dump(obj, fh) 
Example 42
Project: developer-badge-2018-apps   Author: IBM-Developer-Korea   File: util.py    Apache License 2.0 5 votes vote down vote up
def save(self):
        # Create config_dir if do not exists
        try:
            uos.stat(self.config_dir)
        except OSError:
            uos.mkdir(self.config_dir)
        with open('{}/{}.json'.format(self.config_dir, self.name), 'w') as fp:
            json.dump(self.data, fp) 
Example 43
Project: twitter_mongodb_helper   Author: IDEA-NTHU-Taiwan   File: run.py    MIT License 5 votes vote down vote up
def write_json_file(filename, path, result):
    """Writes the result to json with the given filename.

    Args:
        filename   (str): Filename to write to.
        path       (str): Directory path to use.
    """
    with open(path + filename + ".json", "w+") as json_file:
        ujson.dump(result, json_file)
    json_file.close() 
Example 44
Project: DBLPParser   Author: IsaacChanghau   File: dblp_parser.py    MIT License 5 votes vote down vote up
def parse_entity(dblp_path, save_path, type_name, features=None, save_to_csv=False, include_key=False):
    """Parse specific elements according to the given type name and features"""
    log_msg("PROCESS: Start parsing for {}...".format(str(type_name)))
    assert features is not None, "features must be assigned before parsing the dblp dataset"
    results = []
    attrib_count, full_entity, part_entity = {}, 0, 0
    for _, elem in context_iter(dblp_path):
        if elem.tag in type_name:
            attrib_values = extract_feature(elem, features, include_key)  # extract required features
            results.append(attrib_values)  # add record to results array
            for key, value in attrib_values.items():
                attrib_count[key] = attrib_count.get(key, 0) + len(value)
            cnt = sum([1 if len(x) > 0 else 0 for x in list(attrib_values.values())])
            if cnt == len(features):
                full_entity += 1
            else:
                part_entity += 1
        elif elem.tag not in all_elements:
            continue
        clear_element(elem)
    if save_to_csv:
        f = open(save_path, 'w', newline='', encoding='utf8')
        writer = csv.writer(f, delimiter=',')
        writer.writerow(features)  # write title
        for record in results:
            # some features contain multiple values (e.g.: author), concatenate with `::`
            row = ['::'.join(v) for v in list(record.values())]
            writer.writerow(row)
        f.close()
    else:  # default save to json file
        with codecs.open(save_path, mode='w', encoding='utf8', errors='ignore') as f:
            ujson.dump(results, f)
    return full_entity, part_entity, attrib_count 
Example 45
Project: rsinc   Author: ConorWilliams   File: rsinc.py    MIT License 5 votes vote down vote up
def write(file, d):
    """Writes dict to json"""
    with open(file, "w") as fp:
        ujson.dump(d, fp, sort_keys=True, indent=2) 
Example 46
Project: QANet-pytorch   Author: hengruo   File: main.py    MIT License 5 votes vote down vote up
def test(model, dataset, eval_file):
    model.eval()
    answer_dict = {}
    losses = []
    num_batches = config.test_num_batches
    with torch.no_grad():
        for i in tqdm(range(num_batches), total=num_batches):
            Cwid, Ccid, Qwid, Qcid, y1, y2, ids = dataset[i]
            Cwid, Ccid, Qwid, Qcid = Cwid.to(device), Ccid.to(device), Qwid.to(device), Qcid.to(device)
            p1, p2 = model(Cwid, Ccid, Qwid, Qcid)
            y1, y2 = y1.to(device), y2.to(device)
            loss1 = F.nll_loss(p1, y1, reduction='mean')
            loss2 = F.nll_loss(p2, y2, reduction='mean')
            loss = (loss1 + loss2) / 2
            losses.append(loss.item())
            yp1 = torch.argmax(p1, 1)
            yp2 = torch.argmax(p2, 1)
            yps = torch.stack([yp1, yp2], dim=1)
            ymin, _ = torch.min(yps, 1)
            ymax, _ = torch.max(yps, 1)
            answer_dict_, _ = convert_tokens(eval_file, ids.tolist(), ymin.tolist(), ymax.tolist())
            answer_dict.update(answer_dict_)
    loss = np.mean(losses)
    metrics = evaluate(eval_file, answer_dict)
    f = open("log/answers.json", "w")
    json.dump(answer_dict, f)
    f.close()
    metrics["loss"] = loss
    print("TEST loss {:8f} F1 {:8f} EM {:8f}\n".format(loss, metrics["f1"], metrics["exact_match"]))
    return metrics 
Example 47
Project: QANet-pytorch   Author: hengruo   File: preproc.py    MIT License 5 votes vote down vote up
def save(filename, obj, message=None):
    if message is not None:
        print("Saving {}...".format(message))
        with open(filename, "w") as fh:
            json.dump(obj, fh) 
Example 48
Project: clade   Author: 17451k   File: __init__.py    Apache License 2.0 5 votes vote down vote up
def __dump_conf(self):
        # Overwrite this file each time
        with open(self.conf_file, "w") as fh:
            ujson.dump(
                self.conf,
                fh,
                sort_keys=True,
                indent=4,
                ensure_ascii=False,
                escape_forward_slashes=False,
            ) 
Example 49
Project: clade   Author: 17451k   File: abstract.py    Apache License 2.0 5 votes vote down vote up
def dump_data(self, data, file_name, indent=4):
        """Dump data to a json file in the object working directory."""

        if not os.path.isabs(file_name):
            file_name = os.path.join(self.work_dir, file_name)

        os.makedirs(os.path.dirname(file_name), exist_ok=True)

        self.debug("Dumping {}".format(file_name))

        try:
            with open(file_name, "w") as fh:
                ujson.dump(
                    data,
                    fh,
                    sort_keys=True,
                    indent=indent,
                    ensure_ascii=False,
                    escape_forward_slashes=False,
                )
        except RecursionError:
            # This is a workaround, but it is rarely required
            self.warning(
                "Do not print data to file due to recursion limit {}".format(
                    file_name
                )
            ) 
Example 50
Project: meerkat   Author: crdietrich   File: data.py    MIT License 5 votes vote down vote up
def create_metadata(self, indent=None):
        """Generate JSON metadata and format it with
        a leading shebang sequence, '#!'

        Returns
        -------
        str, metadata in JSON with '#!' at the beginning
        indent, None or int - passed to json.dump builtin
        """
        return '#!' + self.to_json(indent=indent) 
Example 51
Project: meerkat   Author: crdietrich   File: data.py    MIT License 5 votes vote down vote up
def create_metadata(self, indent=None):
        """Generate JSON metadata and format it with
        a leading shebang sequence, '#!'

        Returns
        -------
        str, JSON formatted metadata describing JSON data format
        indent, None or int - passed to json.dump builtin
        """
        return self.to_json(indent=indent) 
Example 52
Project: rejection-qa   Author: becxer   File: prepro.py    Apache License 2.0 5 votes vote down vote up
def save(filename, obj, message=None):
    if message is not None:
        print("Saving {}...".format(message))
        with open(filename, "w") as fh:
            json.dump(obj, fh) 
Example 53
Project: lrn   Author: bzhangGo   File: prepro.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def save(filename, obj, message=None):
    if message is not None:
        print("Saving {}...".format(message))
        with open(filename, "w") as fh:
            json.dump(obj, fh) 
Example 54
Project: lrn   Author: bzhangGo   File: prepro.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def save(filename, obj, message=None):
    if message is not None:
        print("Saving {}...".format(message))
        with open(filename, "w") as fh:
            json.dump(obj, fh) 
Example 55
Project: salome-kratos-converter   Author: philbucher   File: converter_gui.py    GNU General Public License v3.0 5 votes vote down vote up
def _SaveConverterProject(self, save_as):
        if len(self.tree.get_children()) == 0:
            self.PlotCmdOutput("Nothing to be saved", "red")
        else:
            if (self.save_file_path == "" or save_as):
                input_save_file_path = tk.filedialog.asksaveasfilename(title="Select file",
                                         filetypes=[("converter files","*" + utils.conv_project_file_ending)])

                if input_save_file_path: # A file path was returned
                    if not input_save_file_path.endswith(utils.conv_project_file_ending):
                        input_save_file_path += utils.conv_project_file_ending

                    self.save_file_path = input_save_file_path

            if self.save_file_path == "":
                self.PlotCmdOutput("File was not saved", "red")
            else:
                start_time = time.time()
                serialized_model_part_dict = self.model_part.Serialize()

                # Add general information to file
                serialized_model_part_dict.update({"general" : global_utils.GetGeneralInfoDict(utils.VERSION)})

                with open(self.save_file_path, "w") as save_file:
                    if global_utils.GetDebug():
                        # Do this only for debugging, file size is much larger!
                        json.dump(serialized_model_part_dict, save_file, sort_keys = True, indent = 4)
                    else:
                        fast_json.dump(serialized_model_part_dict, save_file)

                global_utils.LogTiming("Save Project", start_time)
                self.PlotCmdOutput("Saved the project", "green")
                self.unsaved_changes_exist = False
                global_utils.LogInfo("Saved Project") 
Example 56
Project: obi_socket   Author: mattzzw   File: obi_tools.py    MIT License 5 votes vote down vote up
def save_cfg(cfg_list):
    gc.collect()
    #dump_cfg(cfg_dict)
    f = open("obi_socket.cfg", 'w')
    ujson.dump(cfg_list, f)
    f.close()
    gc.collect() 
Example 57
Project: umls-embeddings   Author: r-mal   File: DataGenerator.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, subj=None, rel=None, obj=None, name=None, cachedir="/home/rmm120030/working/umls-mke/.cache",
               valid_triples=None):
    # cachedir = os.path.join(cachedir, name)
    # if os.path.exists(cachedir):
    #   start = time.time()
    #   print('loading negative sampler maps from %s' % cachedir)
    #   self.sr2o = load_dict(os.path.join(cachedir, 'sr2o'))
    #   self.or2s = load_dict(os.path.join(cachedir, 'or2s'))
    #   self.concepts = ujson.load(open(os.path.join(cachedir, 'concepts.json')))
    #   print('done! Took %.2f seconds' % (time.time() - start))
    # else:
    self.sr2o = defaultdict(set)
    self.or2s = defaultdict(set)
    concepts = set()
    triples = zip(subj, rel, obj) if valid_triples is None else valid_triples
    for s, r, o in tqdm(triples, desc='building triple maps', total=len(triples)):
      # s, r, o = int(s), int(r), int(o)
      self.sr2o[(s, r)].add(o)
      self.or2s[(o, r)].add(s)
      concepts.update([s, o])
    self.concepts = list(concepts)

      # print('\n\ncaching negative sampler maps to %s' % cachedir)
      # os.makedirs(cachedir)
      # save_dict(self.sr2o, os.path.join(cachedir, 'sr2o'))
      # save_dict(self.or2s, os.path.join(cachedir, 'or2s'))
      # ujson.dump(self.concepts, open(os.path.join(cachedir, 'concepts.json'), 'w+'))
      # print('done!') 
Example 58
Project: umls-embeddings   Author: r-mal   File: DataGenerator.py    GNU General Public License v3.0 5 votes vote down vote up
def save_dict(d, savepath):
  keys, values = [], []
  for k, v in d.iteritems():
    keys.append(list(k))
    values.append(list(v))

  ujson.dump(keys, open(savepath + '_keys.json', 'w+'))
  ujson.dump(values, open(savepath + '_values.json', 'w+')) 
Example 59
Project: neural_sequence_labeling   Author: IsaacChanghau   File: common.py    MIT License 5 votes vote down vote up
def write_json(filename, dataset):
    with codecs.open(filename, mode="w", encoding="utf-8") as f:
        ujson.dump(dataset, f) 
Example 60
Project: QGforQA   Author: ZhangShiyue   File: utils.py    MIT License 5 votes vote down vote up
def save(filename, obj, message=None):
    if message is not None:
        print("Saving {}...".format(message))
        with open(filename, "w") as fh:
            json.dump(obj, fh) 
Example 61
Project: le-traducteur   Author: dangitstam   File: pretokenize_corpus.py    Apache License 2.0 4 votes vote down vote up
def main():
    """
    Given a parallel corpus, partitions examples into training, development, and test sets.

    Provided output will be a directory containing the partitions:
    <corpus_name> /
        <corpus_name>_train.jsonl
        <corpus_name>_development.jsonl
        <corpus_name>_test.jsonl
        partition_info.txt

    when given a parallel corpus <corpus_name>.jsonl
    """
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    project_root = os.path.abspath(os.path.realpath(os.path.join(
        os.path.dirname(  # Escape out into project directory.
            os.path.dirname( # Escape out into scripts directory.
                os.path.realpath(__file__))))))
    parser.add_argument("--corpus-path", type=str,
                        help="Path to the parallel JSON lines corpus.")
    parser.add_argument("--save-dir", type=str,
                        default=project_root,
                        help="Directory to store the train-dev-test split directory.")
    args = parser.parse_args()
    corpus_name = os.path.basename(args.corpus_path).split('.')[0]
    out_file_path = os.path.join(args.save_dir, corpus_name + "_tokenized.jsonl")
    out_file = open(out_file_path, 'w')

    # Language-specific tokenizers.
    en_tokenizer = SpacyWordSplitter(language='en_core_web_sm')
    fr_tokenizer = SpacyWordSplitter(language='fr_core_news_sm')

    print("Tokenizing utterances for {}...".format(corpus_name))
    with open(args.corpus_path) as f:
        for lines in tqdm(grouper(f, 100, '')):
            # When the grouper collects a group smaller than the batch, padding
            # is done via empty strings.
            # Check for them explicitly before continuing.
            examples = [ujson.loads(line.strip()) for line in filter(lambda l: l, lines)]
            en_utterances = [ex['en'] for ex in examples]
            fr_utterances = [ex['fr'] for ex in examples]

            en_utterances_tokenized = en_tokenizer.batch_split_words(en_utterances)
            fr_utterances_tokenized = fr_tokenizer.batch_split_words(fr_utterances)

            for i, ex in enumerate(examples):
                ex_tokenized = {
                    'id': ex['id'],
                    'en': ' '.join([token.text for token in en_utterances_tokenized[i]]),
                    'fr': ' '.join([token.text for token in fr_utterances_tokenized[i]])
                }
                ujson.dump(ex_tokenized, out_file, ensure_ascii=False)
                out_file.write('\n')

    out_file.close() 
Example 62
Project: R-Net   Author: HKUST-KnowComp   File: main.py    MIT License 4 votes vote down vote up
def test(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    print("Loading model...")
    test_batch = get_dataset(config.test_record_file, get_record_parser(
        config, is_test=True), config).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, char_mat, trainable=False)

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        remapped_dict = {}
        for step in tqdm(range(total // config.batch_size + 1)):
            qa_id, loss, yp1, yp2 = sess.run(
                [model.qa_id, model.loss, model.yp1, model.yp2])
            answer_dict_, remapped_dict_ = convert_tokens(
                eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)
            losses.append(loss)
        loss = np.mean(losses)
        metrics = evaluate(eval_file, answer_dict)
        with open(config.answer_file, "w") as fh:
            json.dump(remapped_dict, fh)
        print("Exact Match: {}, F1: {}".format(
            metrics['exact_match'], metrics['f1'])) 
Example 63
Project: Question_Answering_Models   Author: l11x0m7   File: main.py    MIT License 4 votes vote down vote up
def test(config):
    os.environ["CUDA_VISIBLE_DEVICES"] = config.choose_gpu

    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    print("Loading model...")
    test_batch = get_dataset(config.test_record_file, get_record_parser(
        config, is_test=True), config).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, char_mat, trainable=False)

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True
    try:
        sess_config.gpu_options.per_process_gpu_memory_fraction = config.gpu_memory_fraction
    except:
        sess_config.gpu_options.per_process_gpu_memory_fraction = 0.5

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        remapped_dict = {}
        for step in tqdm(range(total // config.batch_size + 1)):
            qa_id, loss, yp1, yp2 = sess.run(
                [model.qa_id, model.loss, model.yp1, model.yp2])
            answer_dict_, remapped_dict_ = convert_tokens(
                eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)
            losses.append(loss)
        loss = np.mean(losses)
        metrics = evaluate(eval_file, answer_dict)
        with open(config.answer_file, "w") as fh:
            json.dump(remapped_dict, fh)
        print("Exact Match: {}, F1: {}".format(
            metrics['exact_match'], metrics['f1'])) 
Example 64
Project: Question_Answering_Models   Author: l11x0m7   File: main.py    MIT License 4 votes vote down vote up
def test(config):
    os.environ["CUDA_VISIBLE_DEVICES"] = config.choose_gpu
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    graph = tf.Graph()
    print("Loading model...")
    with graph.as_default() as g:
        test_batch = get_dataset(config.test_record_file, get_record_parser(
            config, is_test=True), config).make_one_shot_iterator()

        model = QANet(config, test_batch, word_mat, char_mat, trainable=False, graph = g)

        sess_config = tf.ConfigProto(allow_soft_placement=True)
        sess_config.gpu_options.allow_growth = True
        sess_config.gpu_options.per_process_gpu_memory_fraction = config.gpu_memory_fraction

        with tf.Session(config=sess_config) as sess:
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver()
            saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
            if config.decay < 1.0:
                sess.run(model.assign_vars)
            losses = []
            answer_dict = {}
            remapped_dict = {}
            for step in tqdm(range(total // config.batch_size + 1)):
                qa_id, loss, yp1, yp2 = sess.run(
                    [model.qa_id, model.loss, model.yp1, model.yp2])
                answer_dict_, remapped_dict_ = convert_tokens(
                    eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
                answer_dict.update(answer_dict_)
                remapped_dict.update(remapped_dict_)
                losses.append(loss)
            loss = np.mean(losses)
            metrics = evaluate(eval_file, answer_dict)
            with open(config.answer_file, "w") as fh:
                json.dump(remapped_dict, fh)
            print("Exact Match: {}, F1: {}".format(
                metrics['exact_match'], metrics['f1'])) 
Example 65
Project: Question_Answering_Models   Author: l11x0m7   File: main.py    MIT License 4 votes vote down vote up
def test(config):
    os.environ["CUDA_VISIBLE_DEVICES"] = config.choose_gpu

    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    print("Loading model...")
    test_batch = get_dataset(config.test_record_file, get_record_parser(
        config, is_test=True), config).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, char_mat, trainable=False)

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True
    try:
        sess_config.gpu_options.per_process_gpu_memory_fraction = config.gpu_memory_fraction
    except:
        sess_config.gpu_options.per_process_gpu_memory_fraction = 0.5

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        remapped_dict = {}
        for step in tqdm(range(total // config.batch_size + 1)):
            qa_id, loss, yp1, yp2 = sess.run(
                [model.qa_id, model.loss, model.yp1, model.yp2])
            answer_dict_, remapped_dict_ = convert_tokens(
                eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)
            losses.append(loss)
        loss = np.mean(losses)
        metrics = evaluate(eval_file, answer_dict)
        with open(config.answer_file, "w") as fh:
            json.dump(remapped_dict, fh)
        print("Exact Match: {}, F1: {}".format(
            metrics['exact_match'], metrics['f1'])) 
Example 66
Project: Question_Answering_Models   Author: l11x0m7   File: main.py    MIT License 4 votes vote down vote up
def test(config):
    os.environ["CUDA_VISIBLE_DEVICES"] = config.choose_gpu

    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    print("Loading model...")
    test_batch = get_dataset(config.test_record_file, get_record_parser(
        config, is_test=True), config).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, char_mat, trainable=False)

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True
    try:
        sess_config.gpu_options.per_process_gpu_memory_fraction = config.gpu_memory_fraction
    except:
        sess_config.gpu_options.per_process_gpu_memory_fraction = 0.5

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        remapped_dict = {}
        for step in tqdm(range(total // config.batch_size + 1)):
            qa_id, loss, yp1, yp2 = sess.run(
                [model.qa_id, model.loss, model.yp1, model.yp2])
            answer_dict_, remapped_dict_ = convert_tokens(
                eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)
            losses.append(loss)
        loss = np.mean(losses)
        metrics = evaluate(eval_file, answer_dict)
        with open(config.answer_file, "w") as fh:
            json.dump(remapped_dict, fh)
        print("Exact Match: {}, F1: {}".format(
            metrics['exact_match'], metrics['f1'])) 
Example 67
Project: carbon_xs_gui   Author: lktsui   File: CarbonXS_GUI.py    GNU General Public License v3.0 4 votes vote down vote up
def export_fitting_settings(self):

        """
        Exports currently loaded fitting settings files to a JSON file
        The default directory is "/config/fitting settings"

        :return:
        """

        if self.default_fitsettings_export_dir:
             fname, opened = QtGui.QFileDialog.getSaveFileName(self, 'Export Fitting Settings',
                                                         self.default_fitsettings_export_dir, filter="*.json")

        else:
            fname, opened = QtGui.QFileDialog.getSaveFileName(self, 'Export Fitting Settings',
                                                          os.path.join(user_data_directory,
                                                        'config', 'fitting settings'), filter="*.json")

        if fname:

            data_file = open(fname, 'w')
            directory, _ = os.path.split(fname)
            self.default_fitsettings_export_dir = directory

            fitting_settings = {}

            fitting_settings['theta min'] = self.theta_min_value.value()
            fitting_settings['theta max'] = self.theta_max_value.value()

            fitting_settings['iterations'] = self.iterations.value()
            fitting_settings['nskip']  = self.nskip.value()

            if self.number_layers.currentIndex() == 0:
                fitting_settings['layers'] = 1
            else:
                fitting_settings['layers'] = 2

            fitting_settings['nphi']  = self.n_phi.value()
            fitting_settings['nsg']  = self.n_sg.value()
            fitting_settings['epsilon'] = self.epsilon.value()

            fitting_settings['enable_gc'] = False
            fitting_settings['gc_delta']  = 0
            ujson.dump(fitting_settings, data_file, indent = 4)

            print 'Exported Fitting Settings to: %s'%fname
            self.statusBar().showMessage('Exported Fitting Settings to: %s'%fname) 
Example 68
Project: zulip   Author: zulip   File: compilemessages.py    Apache License 2.0 4 votes vote down vote up
def extract_language_options(self) -> None:
        locale_path = "{}/locale".format(settings.DEPLOY_ROOT)
        output_path = "{}/language_options.json".format(locale_path)

        data = {'languages': []}  # type: Dict[str, List[Dict[str, Any]]]

        try:
            locales = self.get_locales()
        except CalledProcessError:
            # In case we are not under a Git repo, fallback to getting the
            # locales using listdir().
            locales = os.listdir(locale_path)
            locales.append('en')
            locales = list(set(locales))

        for locale in locales:
            if locale == 'en':
                data['languages'].append({
                    'name': 'English',
                    'name_local': 'English',
                    'code': 'en',
                    'locale': 'en',
                })
                continue

            lc_messages_path = os.path.join(locale_path, locale, 'LC_MESSAGES')
            if not os.path.exists(lc_messages_path):
                # Not a locale.
                continue

            info = {}  # type: Dict[str, Any]
            code = to_language(locale)
            percentage = self.get_translation_percentage(locale_path, locale)
            try:
                name = LANG_INFO[code]['name']
                name_local = LANG_INFO[code]['name_local']
            except KeyError:
                # Fallback to getting the name from PO file.
                filename = self.get_po_filename(locale_path, locale)
                name = self.get_name_from_po_file(filename, locale)
                name_local = with_language(name, code)

            info['name'] = name
            info['name_local'] = name_local
            info['code'] = code
            info['locale'] = locale
            info['percent_translated'] = percentage
            data['languages'].append(info)

        with open(output_path, 'w') as writer:
            json.dump(data, writer, indent=2, sort_keys=True)
            writer.write('\n') 
Example 69
Project: zulip   Author: zulip   File: export.py    Apache License 2.0 4 votes vote down vote up
def export_avatars_from_local(realm: Realm, local_dir: Path, output_dir: Path) -> None:

    count = 0
    records = []

    users = list(UserProfile.objects.filter(realm=realm))
    users += [
        get_system_bot(settings.NOTIFICATION_BOT),
        get_system_bot(settings.EMAIL_GATEWAY_BOT),
        get_system_bot(settings.WELCOME_BOT),
    ]
    for user in users:
        if user.avatar_source == UserProfile.AVATAR_FROM_GRAVATAR:
            continue

        avatar_path = user_avatar_path_from_ids(user.id, realm.id)
        wildcard = os.path.join(local_dir, avatar_path + '.*')

        for local_path in glob.glob(wildcard):
            logging.info('Copying avatar file for user %s from %s' % (
                user.email, local_path))
            fn = os.path.relpath(local_path, local_dir)
            output_path = os.path.join(output_dir, fn)
            os.makedirs(str(os.path.dirname(output_path)), exist_ok=True)
            shutil.copy2(str(local_path), str(output_path))
            stat = os.stat(local_path)
            record = dict(realm_id=realm.id,
                          user_profile_id=user.id,
                          user_profile_email=user.email,
                          s3_path=fn,
                          path=fn,
                          size=stat.st_size,
                          last_modified=stat.st_mtime,
                          content_type=None)
            records.append(record)

            count += 1

            if (count % 100 == 0):
                logging.info("Finished %s" % (count,))

    with open(os.path.join(output_dir, "records.json"), "w") as records_file:
        ujson.dump(records, records_file, indent=4) 
Example 70
Project: QANet   Author: NLPLearn   File: main.py    MIT License 4 votes vote down vote up
def test(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    graph = tf.Graph()
    print("Loading model...")
    with graph.as_default() as g:
        test_batch = get_dataset(config.test_record_file, get_record_parser(
            config, is_test=True), config).make_one_shot_iterator()

        model = Model(config, test_batch, word_mat, char_mat, trainable=False, graph = g)

        sess_config = tf.ConfigProto(allow_soft_placement=True)
        sess_config.gpu_options.allow_growth = True

        with tf.Session(config=sess_config) as sess:
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver()
            saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
            if config.decay < 1.0:
                sess.run(model.assign_vars)
            losses = []
            answer_dict = {}
            remapped_dict = {}
            for step in tqdm(range(total // config.batch_size + 1)):
                qa_id, loss, yp1, yp2 = sess.run(
                    [model.qa_id, model.loss, model.yp1, model.yp2])
                answer_dict_, remapped_dict_ = convert_tokens(
                    eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
                answer_dict.update(answer_dict_)
                remapped_dict.update(remapped_dict_)
                losses.append(loss)
            loss = np.mean(losses)
            metrics = evaluate(eval_file, answer_dict)
            with open(config.answer_file, "w") as fh:
                json.dump(remapped_dict, fh)
            print("Exact Match: {}, F1: {}".format(
                metrics['exact_match'], metrics['f1'])) 
Example 71
Project: S-NET   Author: noble6emc2   File: main.py    MIT License 4 votes vote down vote up
def test(config):

	gpu_options = tf.GPUOptions(visible_device_list="2")
	sess_config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)
	sess_config.gpu_options.allow_growth = True

	with open(config.word_emb_file, "r") as fh:
		word_mat = np.array(json.load(fh), dtype=np.float32)
	with open(config.char_emb_file, "r") as fh:
		char_mat = np.array(json.load(fh), dtype=np.float32)
	with open(config.test_eval_file, "r") as fh:
		eval_file = json.load(fh)
	with open(config.test_meta, "r") as fh:
		meta = json.load(fh)

	total = meta["total"]

	print("Loading model...")
	test_batch = get_dataset(config.test_record_file, get_record_parser(
		config, is_test=True), config).make_one_shot_iterator()

	model = Model(config, test_batch, word_mat, char_mat, trainable=False)

	with tf.Session(config=sess_config) as sess:
		sess.run(tf.global_variables_initializer())
		saver = tf.train.Saver()
		saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
		sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
		losses = []
		answer_dict = {}
		remapped_dict = {}
		for step in tqdm(range(total // config.batch_size + 1)):
			qa_id, loss, yp1, yp2 = sess.run(
				[model.qa_id, model.loss, model.yp1, model.yp2])
			answer_dict_, remapped_dict_, outlier = convert_tokens(
				eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
			answer_dict.update(answer_dict_)
			remapped_dict.update(remapped_dict_)
			losses.append(loss)
		loss = np.mean(losses)

		# evaluate with answer_dict, but in evaluate-v1.1.py, evaluate with remapped_dict
		# since only that is saved. Both dict are a little bit different, check evaluate-v1.1.py
		metrics = evaluate(eval_file, answer_dict)
		with open(config.answer_file, "w") as fh:
			json.dump(remapped_dict, fh)
		print("Exact Match: {}, F1: {} Rouge-l-f: {} Rouge-l-p: {} Rouge-l-r: {}".format(\
			metrics['exact_match'], metrics['f1'], metrics['rouge-l-f'], metrics['rouge-l-p'],\
			metrics['rouge-l-r'])) 
Example 72
Project: S-NET   Author: noble6emc2   File: main.py    MIT License 4 votes vote down vote up
def test(config):

	gpu_options = tf.GPUOptions(visible_device_list="2")
	sess_config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)
	sess_config.gpu_options.allow_growth = True

	with open(config.word_emb_file, "r") as fh:
		word_mat = np.array(json.load(fh), dtype=np.float32)
	with open(config.char_emb_file, "r") as fh:
		char_mat = np.array(json.load(fh), dtype=np.float32)
	with open(config.test_eval_file, "r") as fh:
		eval_file = json.load(fh)
	with open(config.test_meta, "r") as fh:
		meta = json.load(fh)

	total = meta["total"]

	print("Loading model...")
	test_batch = get_dataset(config.test_record_file, get_record_parser(
		config, is_test=True), config).make_one_shot_iterator()

	model = Model(config, test_batch, word_mat, char_mat, trainable=False)

	with tf.Session(config=sess_config) as sess:
		sess.run(tf.global_variables_initializer())
		saver = tf.train.Saver()
		saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
		sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
		losses = []
		answer_dict = {}
		remapped_dict = {}
		for step in tqdm(range(total // config.batch_size + 1)):
			qa_id, loss, yp1, yp2 = sess.run(
				[model.qa_id, model.loss, model.yp1, model.yp2])
			answer_dict_, remapped_dict_, outlier = convert_tokens(
				eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
			answer_dict.update(answer_dict_)
			remapped_dict.update(remapped_dict_)
			losses.append(loss)
		loss = np.mean(losses)

		# evaluate with answer_dict, but in evaluate-v1.1.py, evaluate with remapped_dict
		# since only that is saved. Both dict are a little bit different, check evaluate-v1.1.py
		metrics = evaluate(eval_file, answer_dict)
		with open(config.answer_file, "w") as fh:
			json.dump(remapped_dict, fh)
		print("Exact Match: {}, F1: {} Rouge-L-F1: {} Rouge-L-p: {} Rouge-l-r: {}".format(\
			metrics['exact_match'], metrics['f1'], metrics['rouge-l-f'], metrics['rouge-l-p'],\
			metrics['rouge-l-r'])) 
Example 73
Project: S-NET   Author: noble6emc2   File: main.py    MIT License 4 votes vote down vote up
def test(config):

	gpu_options = tf.GPUOptions(visible_device_list="2")
	sess_config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)
	sess_config.gpu_options.allow_growth = True

	with open(config.word_emb_file, "r") as fh:
		word_mat = np.array(json.load(fh), dtype=np.float32)
	with open(config.char_emb_file, "r") as fh:
		char_mat = np.array(json.load(fh), dtype=np.float32)
	with open(config.test_eval_file, "r") as fh:
		eval_file = json.load(fh)
	with open(config.test_meta, "r") as fh:
		meta = json.load(fh)

	total = meta["total"]

	print("Loading model...")
	test_batch = get_dataset(config.test_record_file, get_record_parser(
		config, is_test=True), config).make_one_shot_iterator()

	model = Model(config, test_batch, word_mat, char_mat, trainable=False)

	with tf.Session(config=sess_config) as sess:
		sess.run(tf.global_variables_initializer())
		saver = tf.train.Saver()
		saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
		sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
		losses = []
		answer_dict = {}
		remapped_dict = {}
		for step in tqdm(range(total // config.batch_size + 1)):
			qa_id, loss, yp1, yp2 = sess.run(
				[model.qa_id, model.loss, model.yp1, model.yp2])
			answer_dict_, remapped_dict_, outlier = convert_tokens(
				eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
			answer_dict.update(answer_dict_)
			remapped_dict.update(remapped_dict_)
			losses.append(loss)
		loss = np.mean(losses)

		# evaluate with answer_dict, but in evaluate-v1.1.py, evaluate with remapped_dict
		# since only that is saved. Both dict are a little bit different, check evaluate-v1.1.py
		metrics = evaluate(eval_file, answer_dict)
		with open(config.answer_file, "w") as fh:
			json.dump(remapped_dict, fh)
		print("Exact Match: {}, F1: {} Rouge-l-f: {} Rouge-l-p: {} Rouge-l-r: {}".format(\
			metrics['exact_match'], metrics['f1'], metrics['rouge-l-f'], metrics['rouge-l-p'],\
			metrics['rouge-l-r'])) 
Example 74
Project: S-NET   Author: noble6emc2   File: main.py    MIT License 4 votes vote down vote up
def test(config):

	gpu_options = tf.GPUOptions(visible_device_list="2")
	sess_config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)
	sess_config.gpu_options.allow_growth = True

	with open(config.word_emb_file, "r") as fh:
		word_mat = np.array(json.load(fh), dtype=np.float32)
	with open(config.char_emb_file, "r") as fh:
		char_mat = np.array(json.load(fh), dtype=np.float32)
	with open(config.test_eval_file, "r") as fh:
		eval_file = json.load(fh)
	with open(config.test_meta, "r") as fh:
		meta = json.load(fh)

	total = meta["total"]

	print("Loading model...")
	test_batch = get_dataset(config.test_record_file, get_record_parser(
		config, is_test=True), config).make_one_shot_iterator()

	model = Model(config, test_batch, word_mat, char_mat, trainable=False)

	with tf.Session(config=sess_config) as sess:
		sess.run(tf.global_variables_initializer())
		saver = tf.train.Saver()
		saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
		sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
		losses = []
		answer_dict = {}
		remapped_dict = {}

		# tqdm
		for step in tqdm(range(total // config.batch_size + 1)):
			qa_id, loss, yp1, yp2 = sess.run(
				[model.qa_id, model.loss, model.yp1, model.yp2])
			answer_dict_, remapped_dict_, outlier = convert_tokens(
				eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
			answer_dict.update(answer_dict_)
			remapped_dict.update(remapped_dict_)
			losses.append(loss)
			print("\n",loss)
			if(loss>50):
				for i,j,k in zip(qa_id.tolist(),yp1.tolist(),yp2.tolist()):
					print(answer_dict[str(i)],j,k)
				#print("IDs: {} Losses: {} Yp1: {} Yp2: {}".format(qa_id.tolist(),\
				#	loss.tolist(), yp1.tolist(), yp2.tolist()))
		loss = np.mean(losses)

		# evaluate with answer_dict, but in evaluate-v1.1.py, evaluate with remapped_dict
		# since only that is saved. Both dict are a little bit different, check evaluate-v1.1.py
		metrics = evaluate(eval_file, answer_dict)
		with open(config.answer_file, "w") as fh:
			json.dump(remapped_dict, fh)
		print("Exact Match: {}, F1: {} Rouge-l-f: {} Rouge-l-p: {} Rouge-l-r: {}".format(\
			metrics['exact_match'], metrics['f1'], metrics['rouge-l-f'], metrics['rouge-l-p'],\
			metrics['rouge-l-r'])) 
Example 75
Project: S-NET   Author: noble6emc2   File: main.py    MIT License 4 votes vote down vote up
def test(config):

	gpu_options = tf.GPUOptions(visible_device_list="3")
	sess_config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)
	sess_config.gpu_options.allow_growth = True

	with open(config.word_emb_file, "r") as fh:
		word_mat = np.array(json.load(fh), dtype=np.float32)
	with open(config.char_emb_file, "r") as fh:
		char_mat = np.array(json.load(fh), dtype=np.float32)
	with open(config.test_eval_file, "r") as fh:
		eval_file = json.load(fh)
	with open(config.test_meta, "r") as fh:
		meta = json.load(fh)

	total = meta["total"]

	print("Loading model...")
	test_batch = get_dataset(config.test_record_file, get_record_parser(
		config, is_test=True), config).make_one_shot_iterator()

	model = Model(config, test_batch, word_mat, char_mat, trainable=False)

	with tf.Session(config=sess_config) as sess:
		sess.run(tf.global_variables_initializer())
		saver = tf.train.Saver()
		saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
		sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
		losses = []
		answer_dict = {}
		remapped_dict = {}
		for step in tqdm(range(total // config.batch_size + 1)):
			qa_id, loss, yp1, yp2 = sess.run(
				[model.qa_id, model.loss, model.yp1, model.yp2])
			answer_dict_, remapped_dict_, outlier = convert_tokens(
				eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
			if outlier:
				print(True)
				continue
			answer_dict.update(answer_dict_)
			remapped_dict.update(remapped_dict_)
			losses.append(loss)
		loss = np.mean(losses)
		metrics = evaluate(eval_file, answer_dict)
		with open(config.answer_file, "w") as fh:
			json.dump(remapped_dict, fh)
		print("Exact Match: {}, F1: {}".format(
			metrics['exact_match'], metrics['f1'])) 
Example 76
Project: rsinc   Author: ConorWilliams   File: config.py    MIT License 4 votes vote down vote up
def config_cli(config_path):
    print()
    print("Starting", ylw("configuration"), "mode")

    DRIVE_DIR = os.path.dirname(config_path)  # Where config lives

    BASE_L = os.path.expanduser(input('Path to local root folder i.e "~/": '))
    BASE_L = os.path.normpath(BASE_L)
    print("Local root is:", grn(BASE_L))
    print()

    BASE_R = os.path.expanduser(
        input('Path to remote root folder i.e "onedrive:": ')
    )
    BASE_R = os.path.normpath(BASE_R)
    if ":" not in BASE_R:
        BASE_R += ":"
        print('Missing trailing ":" corrected to:', BASE_R)

    print("Remote root is:", grn(BASE_R))
    print()

    print("Finding a matching hash function...")
    lcl_hashes = get_hashes(BASE_L)
    rmt_hashes = get_hashes(BASE_R)
    common = lcl_hashes.intersection(rmt_hashes)

    if lcl_hashes is None or rmt_hashes is None or len(common) == 0:
        print(red("ERROR:"), "could not find a valid hash")
        hash = input("Please enter hash manually: ")
    else:
        hash = sorted(common, key=len)[0]

    print("Using common hash:", grn(hash))

    CASE_INSENSATIVE = (
        input("Do local and remote have same case sensativity? (y/n) ")
        not in STB
    )

    defult_config = {
        "BASE_R": BASE_R,
        "BASE_L": BASE_L,
        "CASE_INSENSATIVE": CASE_INSENSATIVE,
        "HASH_NAME": hash,
        "DEFAULT_DIRS": [],
        "LOG_FOLDER": os.path.join(DRIVE_DIR, "logs/"),
        "MASTER": os.path.join(DRIVE_DIR, "master.json"),
        "TEMP_FILE": os.path.join(DRIVE_DIR, "rsinc.tmp"),
        "FAST_SAVE": False,
    }

    with open(config_path, "w") as file:
        print("Writing config to:", config_path)
        ujson.dump(defult_config, file, sort_keys=True, indent=4) 
Example 77
Project: rejection-qa   Author: becxer   File: main.py    Apache License 2.0 4 votes vote down vote up
def test(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    graph = tf.Graph()
    print("Loading model...")
    with graph.as_default() as g:
        test_batch = get_dataset(config.test_record_file, get_record_parser(
            config, is_test=True), config).make_one_shot_iterator()

        model = Model(config, test_batch, word_mat, char_mat, trainable=False, graph = g)

        sess_config = tf.ConfigProto(allow_soft_placement=True)
        sess_config.gpu_options.allow_growth = True

        with tf.Session(config=sess_config) as sess:
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver()
            saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
            if config.decay < 1.0:
                sess.run(model.assign_vars)
            losses = []
            answer_dict = {}
            remapped_dict = {}
            for step in tqdm(range(total // config.batch_size + 1)):
                qa_id, loss, yp1, yp2 = sess.run(
                    [model.qa_id, model.loss, model.yp1, model.yp2])
                answer_dict_, remapped_dict_ = convert_tokens(
                    eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
                answer_dict.update(answer_dict_)
                remapped_dict.update(remapped_dict_)
                losses.append(loss)
            loss = np.mean(losses)
            metrics = evaluate(eval_file, answer_dict)
            with open(config.answer_file, "w") as fh:
                json.dump(remapped_dict, fh)
            print("Exact Match: {}, F1: {}".format(
                metrics['exact_match'], metrics['f1'])) 
Example 78
Project: lrn   Author: bzhangGo   File: main.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def test(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    print("Loading model...")
    test_batch = get_dataset(config.test_record_file, get_record_parser(
        config, is_test=True), config).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, char_mat, trainable=False)

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        remapped_dict = {}
        for step in tqdm(range(total // config.batch_size + 1)):
            qa_id, loss, yp1, yp2 = sess.run(
                [model.qa_id, model.loss, model.yp1, model.yp2])
            answer_dict_, remapped_dict_ = convert_tokens(
                eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)
            losses.append(loss)
        loss = np.mean(losses)
        metrics = evaluate(eval_file, answer_dict)
        with open(config.answer_file, "w") as fh:
            json.dump(remapped_dict, fh)
        print("Exact Match: {}, F1: {}".format(
            metrics['exact_match'], metrics['f1'])) 
Example 79
Project: lrn   Author: bzhangGo   File: main.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def test(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.test_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.test_meta, "r") as fh:
        meta = json.load(fh)

    total = meta["total"]

    print("Loading model...")
    test_batch = get_dataset(config.test_record_file, get_record_parser(
        config, is_test=True), config).make_one_shot_iterator()

    model = Model(config, test_batch, word_mat, char_mat, trainable=False)

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True

    with tf.Session(config=sess_config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        remapped_dict = {}
        for step in tqdm(range(total // config.batch_size + 1)):
            qa_id, loss, yp1, yp2 = sess.run(
                [model.qa_id, model.loss, model.yp1, model.yp2])
            answer_dict_, remapped_dict_ = convert_tokens(
                eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)
            losses.append(loss)
        loss = np.mean(losses)
        metrics = evaluate(eval_file, answer_dict)
        with open(config.answer_file, "w") as fh:
            json.dump(remapped_dict, fh)
        print("Exact Match: {}, F1: {}".format(
            metrics['exact_match'], metrics['f1'])) 
Example 80
Project: iswc-2016-semantic-labeling   Author: minhptx   File: api.py    Apache License 2.0 4 votes vote down vote up
def semantic_labeling(train_dataset, test_dataset, train_dataset2=None, evaluate_train_set=False, reuse_rf_model=True):
    """Doing semantic labeling, train on train_dataset, and test on test_dataset.

    train_dataset2 is optionally provided in case train_dataset, and test_dataset doesn't have overlapping semantic types
    For example, given that train_dataset is soccer domains, and test_dataset is weather domains; the system isn't able
    to recognize semantic types of test_dataset because of no overlapping. We need to provide another train_dataset2, which
    has semantic types of weather domains; so that the system is able to make prediction.

    Train_dataset2 is default to train_dataset. (train_dataset is use to train RandomForest)

    :param train_dataset: str
    :param test_dataset: str
    :param train_dataset2: Optional[str]
    :param evaluate_train_set: bool
    :param reuse_rf_model: bool
    :return:
    """
    logger = get_logger("semantic-labeling-api", format_str='>>>>>> %(asctime)s - %(levelname)s:%(name)s:%(module)s:%(lineno)d:   %(message)s')

    if train_dataset2 is None:
        train_dataset2 = train_dataset
        datasets = [train_dataset, test_dataset]
    else:
        datasets = [train_dataset, test_dataset, train_dataset2]

    semantic_labeler = SemanticLabeler()
    # read data into memory
    logger.info("Read data into memory")
    semantic_labeler.read_data_sources(list(set(datasets)))
    # index datasets that haven't been indexed before

    not_indexed_datasets = list({dataset for dataset in datasets if not is_indexed(dataset)})
    if len(not_indexed_datasets) > 0:
        logger.info("Index not-indexed datasets: %s" % ",".join(not_indexed_datasets))
        semantic_labeler.train_semantic_types(not_indexed_datasets)

    # remove existing file if not reuse previous random forest model
    if not reuse_rf_model and os.path.exists("model/lr.pkl"):
        os.remove("model/lr.pkl")

    # train the model
    logger.info("Train randomforest... with args ([1], [%s]", train_dataset)
    semantic_labeler.train_random_forest([1], [train_dataset])

    # generate semantic typing
    logger.info("Generate semantic typing using: trainset: %s, for testset: %s", train_dataset, test_dataset)
    result = semantic_labeler.test_semantic_types_from_2_sets(train_dataset2, test_dataset)

    if not os.path.exists("output"):
        os.mkdir("output")
    with open("output/%s_result.json" % test_dataset, "w") as f:
        ujson.dump(result, f)

    if evaluate_train_set:
        logger.info("Generate semantic typing for trainset")
        result = semantic_labeler.test_semantic_types_from_2_sets(train_dataset2, train_dataset2)
        with open("output/%s_result.json" % train_dataset2, "w") as f:
            ujson.dump(result, f)

    return result