Python json.html() Examples

The following are code examples for showing how to use json.html(). They are extracted from open source Python projects. You can vote up the examples you like or vote down the ones you don't like. You can also save this page to your account.

Example 1
Project: oadoi   Author: Impactstory   File: util.py    (MIT License) View Source Project 7 votes vote down vote up
def get_tree(page):
    page = page.replace(" ", " ")  # otherwise starts-with for lxml doesn't work
    try:
        tree = html.fromstring(page)
    except (etree.XMLSyntaxError, etree.ParserError) as e:
        print u"not parsing, beause etree error in get_tree: {}".format(e)
        tree = None
    return tree 
Example 2
Project: freshonions-torscraper   Author: dirtyfilthy   File: tor_elasticsearch.py    (license) View Source Project 6 votes vote down vote up
def elasticsearch_pages(context, sort, page):
    result_limit = int(os.environ['RESULT_LIMIT'])
    max_result_limit = int(os.environ['MAX_RESULT_LIMIT'])
    start = (page - 1) * result_limit
    end   = start + result_limit
    domain_query = Q("term", is_banned=False)
    if context["is_up"]:
        domain_query = domain_query & Q("term", is_up=True)
    if not context["show_fh_default"]:
        domain_query = domain_query & Q("term", is_crap=False)
    if not context["show_subdomains"]:
        domain_query = domain_query & Q("term", is_subdomain=False)
    if context["rep"] == "genuine":
        domain_query = domain_query & Q("term", is_genuine=True)
    if context["rep"] == "fake":
        domain_query = domain_query & Q("term", is_fake=True)



    limit = max_result_limit if context["more"] else result_limit

    has_parent_query = Q("has_parent", type="domain", query=domain_query)
    if context['phrase']:
        query = Search().filter(has_parent_query).query(Q("match_phrase", body_stripped=context['search']))
    else:
        query = Search().filter(has_parent_query).query(Q("match", body_stripped=context['search']))

    query = query.highlight_options(order='score', encoder='html').highlight('body_stripped')[start:end]
    query = query.source(['title','domain_id','created_at', 'visited_at']).params(request_cache=True)

    if   context["sort"] == "onion":
        query = query.sort("_parent")
    elif context["sort"] == "visited_at":
        query = query.sort("-visited_at")
    elif context["sort"] == "created_at":
        query = query.sort("-created_at")
    elif context["sort"] == "last_seen":
        query = query.sort("-visited_at")

    return query.execute() 
Example 3
Project: message-author-classifier   Author: IvayloAtanasov   File: json-to-pkl.py    (license) View Source Project 6 votes vote down vote up
def main():
    # load files
    # TODO: json loading is different every time, use object_pairs_hook?
    #  https://docs.python.org/3/library/json.html#json.load
    with open('../slack-data/users.json', 'r', encoding='utf-8') as users_json:
        users = json.load(users_json)

    with open('../slack-data/channels.json', 'r', encoding='utf-8') as channels_json:
        channels = json.load(channels_json)

    with open('../slack-data/privateChannels.json', 'r', encoding='utf-8') as private_channels_json:
        private_channels = json.load(private_channels_json)

    # merge channels with private channels
    channels = channels + private_channels

    # merge from "per-channel" to "per-user" messages collection
    users_messages = flatten_messages(channels)
    # remove users with not enough messages as over-sampling their messages can lead to overfitting
    users_messages = discard_insufficient_data_users(users_messages, users)
    # stem words in messages
    users_messages = stem_messages(users_messages)
    # make all remained users have equal number of messages
    users_messages = balance_messages(users_messages)

    messages_output = []
    authors_output = []
    for user_id, messages in users_messages.items():
        for message in messages:
            authors_output.append(user_index_by_id(user_id, users))
            messages_output.append(message)

    pickle.dump(messages_output, open('messages.pkl', 'wb'))
    pickle.dump(authors_output, open('authors.pkl', 'wb'))

    print('Saved a total of ' + str(len(messages_output)) + ' processed messages') 
Example 4
Project: citeas-api   Author: Impactstory   File: util.py    (license) View Source Project 6 votes vote down vote up
def get_random_dois(n):
    url = u"http://api.crossref.org/works?filter=from-pub-date:2006-01-01&sample={}".format(n)
    r = requests.get(url)
    items = r.json()["message"]["items"]
    dois = [item["DOI"] for item in items]
    print dois

# from https://github.com/elastic/elasticsearch-py/issues/374
# to work around unicode problem
# import elasticsearch
# class JSONSerializerPython2(elasticsearch.serializer.JSONSerializer):
#     """Override elasticsearch library serializer to ensure it encodes utf characters during json dump.
#     See original at: https://github.com/elastic/elasticsearch-py/blob/master/elasticsearch/serializer.py#L42
#     A description of how ensure_ascii encodes unicode characters to ensure they can be sent across the wire
#     as ascii can be found here: https://docs.python.org/2/library/json.html#basic-usage
#     """
#     def dumps(self, data):
#         # don't serialize strings
#         if isinstance(data, elasticsearch.compat.string_types):
#             return data
#         try:
#             return json.dumps(data, default=self.default, ensure_ascii=True)
#         except (ValueError, TypeError) as e:
#             raise elasticsearch.exceptions.SerializationError(data, e) 
Example 5
Project: desert-mirage   Author: valentour   File: desert_mirage_lib.py    (license) View Source Project 4 votes vote down vote up
def json_config(jfile, jobj_hook=None, jwrite_obj=None, jappend=None):
    """
    Simple interface to json library functions. Reads JSON data into object
    dictionary or appends json data to existing file.
    See the json library documentation for  more info.
    `json <https://docs.python.org/3/library/json.html>`_

    Parameters
    ----------
    jfile : str
        json file path.
    jobj_hook : function (default: None)
        Decoder. If None, decodes to dict.
    jwrite_obj : obj (default: None)
        Obj to write to existing json file ``jfile``. 
        Evaluated before ``jappend``.
    jappend : obj (default: None)
        New data to append to existing json file ``jfile``.
    """
    # write if file does not exist.
    if jwrite_obj is not None:
        # Write `jwrite_obj` if file does not exist.
        if not any([os.path.isfile(jfile),
                    os.path.isfile(os.path.abspath(jfile)),
                    jwrite_obj]):
            print('writing `jwrite_obj` to new json `jfile`.')
            with open(jfile, 'w') as f:
                json.dump(jwrite_obj, f, sort_keys=True, ensure_ascii=False)
        else:
            print('No json in path provided.')
        return
    if jappend is not None:
        with open(jfile, 'r+') as f:
            json_dict = json.load(f, object_hook=None)
            json_dict.update(jappend)
            f.seek(0)
            f.truncate()  # todo: Improve to only truncate if needed.
            # print(len(f.readlines()))
            json.dump(json_dict, f, sort_keys=True, indent=4)
            f.close()
        return
    with open(jfile) as f:
        if jobj_hook is not None:
            return json.load(f, object_hook=jobj_hook)
        return json.load(f) 
Example 6
Project: chemcoord   Author: mcocdawc   File: _cartesian_class_io.py    (license) View Source Project 4 votes vote down vote up
def to_cjson(self, buf=None, **kwargs):
        """Write a cjson file or return dictionary.

        The cjson format is specified
        `here <https://github.com/OpenChemistry/chemicaljson>`_.

        Args:
            buf (str): If it is a filepath, the data is written to
                filepath. If it is None, a dictionary with the cjson
                information is returned.
            kwargs: The keyword arguments are passed into the
                ``dump`` function of the
                `json library <https://docs.python.org/3/library/json.html>`_.

        Returns:
            dict:
        """
        cjson_dict = {'chemical json': 0}

        cjson_dict['atoms'] = {}

        atomic_number = constants.elements['atomic_number'].to_dict()
        cjson_dict['atoms'] = {'elements': {}}
        cjson_dict['atoms']['elements']['number'] = [
            int(atomic_number[x]) for x in self['atom']]

        cjson_dict['atoms']['coords'] = {}
        coords = self.loc[:, ['x', 'y', 'z']].values.reshape(len(self) * 3)
        cjson_dict['atoms']['coords']['3d'] = [float(x) for x in coords]

        bonds = []
        bond_dict = self.get_bonds()
        for i in bond_dict:
            for b in bond_dict[i]:
                bonds += [int(i), int(b)]
                bond_dict[b].remove(i)

        cjson_dict['bonds'] = {'connections': {}}
        cjson_dict['bonds']['connections']['index'] = bonds

        if buf is not None:
            with open(buf, mode='w') as f:
                f.write(json.dumps(cjson_dict, **kwargs))
        else:
            return cjson_dict