Python json.html() Examples

The following are 26 code examples of json.html(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module json , or try the search function .
Example #1
Source File: utils.py    From pyDataverse with MIT License 6 votes vote down vote up
def json_to_dict(data):
    """Convert JSON to a dict().

    See more about the json module at
    https://docs.python.org/3.5/library/json.html

    Parameters
    ----------
    data : string
        Data as a json-formatted string.

    Returns
    -------
    dict
        Data as Python Dictionary.

    """
    try:
        return json.loads(data)
    except Exception as e:
        raise e 
Example #2
Source File: utils.py    From pyDataverse with MIT License 6 votes vote down vote up
def dict_to_json(data):
    """Convert dict() to JSON-formatted string.

    See more about the json module at
    https://docs.python.org/3.5/library/json.html

    Parameters
    ----------
    data : dict
        Data as Python Dictionary.

    Returns
    -------
    string
        Data as a json-formatted string.

    """
    try:
        return json.dumps(data, ensure_ascii=True, indent=2)
    except Exception as e:
        raise e 
Example #3
Source File: utils.py    From pyDataverse with MIT License 6 votes vote down vote up
def read_file(filename, mode='r'):
    """Read in a file.

    Parameters
    ----------
    filename : string
        Filename with full path.
    mode : string
        Read mode of file. Defaults to `r`. See more at
        https://docs.python.org/3.5/library/functions.html#open

    Returns
    -------
    string
        Returns data as string.

    """
    try:
        with open(filename, mode) as f:
            data = f.read()
        return data
    except IOError:
        print('An error occured trying to read the file {}.'.format(filename))
    except Exception as e:
        raise e 
Example #4
Source File: utils.py    From pyDataverse with MIT License 6 votes vote down vote up
def write_file(filename, data, mode='w'):
    """Write data in a file.

    Parameters
    ----------
    filename : string
        Filename with full path.
    data : string
        Data to be stored.
    mode : string
        Read mode of file. Defaults to `w`. See more at
        https://docs.python.org/3.5/library/functions.html#open

    """
    try:
        with open(filename, mode) as f:
            f.write(data)
    except IOError:
        print('An error occured trying to write the file {}.'.format(filename))
    except Exception as e:
        raise e 
Example #5
Source File: utils.py    From pyDataverse with MIT License 6 votes vote down vote up
def read_file_json(filename):
    """Read in a json file.

    See more about the json module at
    https://docs.python.org/3.5/library/json.html

    Parameters
    ----------
    filename : string
        Filename with full path.

    Returns
    -------
    dict
        Data as a json-formatted string.

    """
    try:
        return json_to_dict(read_file(filename, 'r'))
    except Exception as e:
        raise e 
Example #6
Source File: utils.py    From pyDataverse with MIT License 6 votes vote down vote up
def read_file_csv(filename):
    """Read in CSV file.

    See more at `csv.reader() <https://docs.python.org/3.5/library/csv.html>`_.

    Parameters
    ----------
    filename : string
        Full filename with path of file.

    Returns
    -------
    reader
        Reader object, which can be iterated over.

    """
    try:
        with open(filename, newline='') as csvfile:
            return csv.reader(csvfile, delimiter=',', quotechar='"')
    except Exception as e:
        raise e
    finally:
        csvfile.close() 
Example #7
Source File: json_encoder.py    From pybotics with MIT License 6 votes vote down vote up
def default(self, o: Any) -> Any:  # pragma: no cover
        """Return serializable robot objects."""
        # TODO: use @overload to split function
        # BODY: Reduces cyclomatic complexity; but requires NumPy typing
        if isinstance(o, np.ndarray):
            return o.tolist()
        elif isinstance(o, np.random.RandomState):
            return None
        elif isinstance(o, np.generic):
            return str(o)
        else:
            try:
                o = o.__dict__
            except AttributeError:
                pass
            else:
                return o

        # let the base class default method raise the TypeError
        # https://docs.python.org/3/library/json.html
        return json.JSONEncoder.default(self, o) 
Example #8
Source File: loghandler.py    From quay with Apache License 2.0 6 votes vote down vote up
def __init__(self, *args, **kwargs):
        """
        :param json_default: a function for encoding non-standard objects
            as outlined in http://docs.python.org/2/library/json.html
        :param json_encoder: optional custom encoder
        :param json_serializer: a :meth:`json.dumps`-compatible callable
            that will be used to serialize the log record.
        :param prefix: an optional key prefix to nest logs
        """
        self.json_default = kwargs.pop("json_default", _json_default)
        self.json_encoder = kwargs.pop("json_encoder", None)
        self.json_serializer = kwargs.pop("json_serializer", json.dumps)
        self.default_values = kwargs.pop("default_extra", {})
        self.prefix_key = kwargs.pop("prefix_key", "data")

        logging.Formatter.__init__(self, *args, **kwargs)

        self._fmt_parameters = self._parse_format_string()
        self._skip_fields = set(self._fmt_parameters)
        self._skip_fields.update(RESERVED_ATTRS) 
Example #9
Source File: jsonutils.py    From oslo.serialization with Apache License 2.0 6 votes vote down vote up
def dump(obj, fp, *args, **kwargs):
    """Serialize ``obj`` as a JSON formatted stream to ``fp``

    :param obj: object to be serialized
    :param fp: a ``.write()``-supporting file-like object
    :param default: function that returns a serializable version of an object,
                    :func:`to_primitive` is used by default.
    :param args: extra arguments, please see documentation \
    of `json.dump <https://docs.python.org/2/library/json.html#basic-usage>`_
    :param kwargs: extra named parameters, please see documentation \
    of `json.dump <https://docs.python.org/2/library/json.html#basic-usage>`_

    .. versionchanged:: 1.3
       The *default* parameter now uses :func:`to_primitive` by default.
    """
    default = kwargs.get('default', to_primitive)
    return json.dump(obj, fp, default=default, *args, **kwargs) 
Example #10
Source File: loghandler.py    From quay with Apache License 2.0 6 votes vote down vote up
def _json_default(obj):
    """
    Coerce everything to strings.

    All objects representing time get output as ISO8601.
    """
    if isinstance(obj, (datetime.date, datetime.time, datetime.datetime)):
        return obj.isoformat()

    elif isinstance(obj, Exception):
        return "Exception: %s" % str(obj)

    return str(obj)


# skip natural LogRecord attributes
# http://docs.python.org/library/logging.html#logrecord-attributes 
Example #11
Source File: util.py    From dokomoforms with GNU General Public License v3.0 6 votes vote down vote up
def default(self, obj):
        """Handle special types for json.dumps.

        If obj is a model from dokomoforms.models, return a dictionary
        representation.

        If obj is a datetime.date or datetime.time, return an
        ISO 8601 representation string.

        If obj is a psycpg2 Range, return its string representation.

        Otherwise, throw a TypeError.

        See
        https://docs.python.org/3/library/json.html#json.JSONEncoder.default
        """
        try:
            return jsonify(obj, raise_exception=True)
        except NotJSONifiableError:
            return super().default(obj) 
Example #12
Source File: jsonutil.py    From clgen with GNU General Public License v3.0 5 votes vote down vote up
def loads(text, **kwargs):
  """
  Deserialize `text` (a `str` or `unicode` instance containing a JSON
  document with Python or JavaScript like comments) to a Python object.

  Supported comment types: `// comment` and `# comment`.

  Taken from `commentjson <https://github.com/vaidik/commentjson>`_, written
  by `Vaidik Kapoor <https://github.com/vaidik>`_.

  Copyright (c) 2014 Vaidik Kapoor, MIT license.

  Arguments:
      text (str): serialized JSON string with or without comments.
      **kwargs (optional): all the arguments that
          `json.loads <http://docs.python.org/2/library/json.html#json.loads>`_
          accepts.

  Returns:
      `dict` or `list`: Decoded JSON.
  """
  regex = r"\s*(#|\/{2}).*$"
  regex_inline = (
    r"(:?(?:\s)*([A-Za-z\d\.{}]*)|((?<=\").*\"),?)(?:\s)*(((#|(\/{2})).*)|)$"
  )
  lines = text.split("\n")

  for index, line in enumerate(lines):
    if re.search(regex, line):
      if re.search(r"^" + regex, line, re.IGNORECASE):
        lines[index] = ""
      elif re.search(regex_inline, line):
        lines[index] = re.sub(regex_inline, r"\1", line)

  return json.loads("\n".join(lines), **kwargs) 
Example #13
Source File: util.py    From oadoi with MIT License 5 votes vote down vote up
def clean_doi(dirty_doi, return_none_if_error=False):
    if not dirty_doi:
        if return_none_if_error:
            return None
        else:
            raise NoDoiException("There's no DOI at all.")

    dirty_doi = dirty_doi.strip()
    dirty_doi = dirty_doi.lower()

    # test cases for this regex are at https://regex101.com/r/zS4hA0/1
    p = re.compile(ur'(10\.\d+\/[^\s]+)')

    matches = re.findall(p, dirty_doi)
    if len(matches) == 0:
        if return_none_if_error:
            return None
        else:
            raise NoDoiException("There's no valid DOI.")

    match = matches[0]
    match = remove_nonprinting_characters(match)

    try:
        resp = unicode(match, "utf-8")  # unicode is valid in dois
    except (TypeError, UnicodeDecodeError):
        resp = match

    # remove any url fragments
    if u"#" in resp:
        resp = resp.split(u"#")[0]

    # remove double quotes, they shouldn't be there as per http://www.doi.org/syntax.html
    resp = resp.replace('"', '')

    # remove trailing period, comma -- it is likely from a sentence or citation
    if resp.endswith(u",") or resp.endswith(u"."):
        resp = resp[:-1]

    return resp 
Example #14
Source File: utils.py    From airbrake-python with MIT License 5 votes vote down vote up
def default(self, o):  # pylint: disable=E0202
        # E0202 ignored in favor of compliance with documentation:
        # https://docs.python.org/2/library/json.html#json.JSONEncoder.default
        """Return object's repr when not JSON serializable."""
        try:
            return repr(o)
        except Exception:  # pylint: disable=W0703
            return super(FailProofJSONEncoder, self).default(o) 
Example #15
Source File: tor_elasticsearch.py    From freshonions-torscraper with GNU Affero General Public License v3.0 5 votes vote down vote up
def elasticsearch_pages(context, sort, page):
    result_limit = int(os.environ['RESULT_LIMIT'])
    max_result_limit = int(os.environ['MAX_RESULT_LIMIT'])
    start = (page - 1) * result_limit
    end   = start + result_limit
    domain_query = Q("term", is_banned=False)
    if context["is_up"]:
        domain_query = domain_query & Q("term", is_up=True)
    if not context["show_fh_default"]:
        domain_query = domain_query & Q("term", is_crap=False)
    if not context["show_subdomains"]:
        domain_query = domain_query & Q("term", is_subdomain=False)
    if context["rep"] == "genuine":
        domain_query = domain_query & Q("term", is_genuine=True)
    if context["rep"] == "fake":
        domain_query = domain_query & Q("term", is_fake=True)



    limit = max_result_limit if context["more"] else result_limit

    has_parent_query = Q("has_parent", type="domain", query=domain_query)
    if context['phrase']:
        query = Search().filter(has_parent_query).query(Q("match_phrase", body_stripped=context['search']))
    else:
        query = Search().filter(has_parent_query).query(Q("match", body_stripped=context['search']))

    query = query.highlight_options(order='score', encoder='html').highlight('body_stripped')[start:end]
    query = query.source(['title','domain_id','created_at', 'visited_at']).params(request_cache=True)

    if   context["sort"] == "onion":
        query = query.sort("_parent")
    elif context["sort"] == "visited_at":
        query = query.sort("-visited_at")
    elif context["sort"] == "created_at":
        query = query.sort("-created_at")
    elif context["sort"] == "last_seen":
        query = query.sort("-visited_at")

    return query.execute() 
Example #16
Source File: __init__.py    From watchtower with Apache License 2.0 5 votes vote down vote up
def batch_sender(self, my_queue, stream_name, send_interval, max_batch_size, max_batch_count):
        msg = None

        def size(_msg):
            return (len(_msg["message"]) if isinstance(_msg, dict) else 1) + CloudWatchLogHandler.EXTRA_MSG_PAYLOAD_SIZE

        def truncate(_msg2):
            warnings.warn("Log message size exceeds CWL max payload size, truncated", WatchtowerWarning)
            _msg2["message"] = _msg2["message"][:max_batch_size - CloudWatchLogHandler.EXTRA_MSG_PAYLOAD_SIZE]
            return _msg2

        # See https://boto3.readthedocs.io/en/latest/reference/services/logs.html#CloudWatchLogs.Client.put_log_events
        while msg != self.END:
            cur_batch = [] if msg is None or msg == self.FLUSH else [msg]
            cur_batch_size = sum(size(msg) for msg in cur_batch)
            cur_batch_msg_count = len(cur_batch)
            cur_batch_deadline = time.time() + send_interval
            while True:
                try:
                    msg = my_queue.get(block=True, timeout=max(0, cur_batch_deadline - time.time()))
                    if size(msg) > max_batch_size:
                        msg = truncate(msg)
                except queue.Empty:
                    # If the queue is empty, we don't want to reprocess the previous message
                    msg = None
                if msg is None \
                   or msg == self.END \
                   or msg == self.FLUSH \
                   or cur_batch_size + size(msg) > max_batch_size \
                   or cur_batch_msg_count >= max_batch_count \
                   or time.time() >= cur_batch_deadline:
                    self._submit_batch(cur_batch, stream_name)
                    if msg is not None:
                        # We don't want to call task_done if the queue was empty and we didn't receive anything new
                        my_queue.task_done()
                    break
                elif msg:
                    cur_batch_size += size(msg)
                    cur_batch_msg_count += 1
                    cur_batch.append(msg)
                    my_queue.task_done() 
Example #17
Source File: jsonutils.py    From oslo.serialization with Apache License 2.0 5 votes vote down vote up
def load(fp, encoding='utf-8', **kwargs):
    """Deserialize ``fp`` to a Python object.

    :param fp: a ``.read()`` -supporting file-like object
    :param encoding: encoding used to interpret the string
    :param kwargs: extra named parameters, please see documentation \
    of `json.loads <https://docs.python.org/2/library/json.html#basic-usage>`_
    :returns: python object
    """
    return json.load(codecs.getreader(encoding)(fp), **kwargs) 
Example #18
Source File: jsonutils.py    From oslo.serialization with Apache License 2.0 5 votes vote down vote up
def loads(s, encoding='utf-8', **kwargs):
    """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON

    :param s: string to deserialize
    :param encoding: encoding used to interpret the string
    :param kwargs: extra named parameters, please see documentation \
    of `json.loads <https://docs.python.org/2/library/json.html#basic-usage>`_
    :returns: python object
    """
    return json.loads(encodeutils.safe_decode(s, encoding), **kwargs) 
Example #19
Source File: jsonutils.py    From oslo.serialization with Apache License 2.0 5 votes vote down vote up
def dump_as_bytes(obj, default=to_primitive, encoding='utf-8', **kwargs):
    """Serialize ``obj`` to a JSON formatted ``bytes``.

    :param obj: object to be serialized
    :param default: function that returns a serializable version of an object,
                    :func:`to_primitive` is used by default.
    :param encoding: encoding used to encode the serialized JSON output
    :param kwargs: extra named parameters, please see documentation \
    of `json.dumps <https://docs.python.org/2/library/json.html#basic-usage>`_
    :returns: json formatted string

    .. versionadded:: 1.10
    """
    return dumps(obj, default=default, **kwargs).encode(encoding) 
Example #20
Source File: jsonutils.py    From oslo.serialization with Apache License 2.0 5 votes vote down vote up
def dumps(obj, default=to_primitive, **kwargs):
    """Serialize ``obj`` to a JSON formatted ``str``.

    :param obj: object to be serialized
    :param default: function that returns a serializable version of an object,
                    :func:`to_primitive` is used by default.
    :param kwargs: extra named parameters, please see documentation \
    of `json.dumps <https://docs.python.org/2/library/json.html#basic-usage>`_
    :returns: json formatted string

    Use dump_as_bytes() to ensure that the result type is ``bytes`` on Python 2
    and Python 3.
    """
    return json.dumps(obj, default=default, **kwargs) 
Example #21
Source File: client.py    From rele with Apache License 2.0 5 votes vote down vote up
def consume(self, subscription_name, callback, scheduler):
        """Begin listening to topic from the SubscriberClient.

        :param subscription_name: str Subscription name
        :param callback: Function which act on a topic message
        :param scheduler: `Thread pool-based scheduler.<https://googleapis.dev/python/pubsub/latest/subscriber/api/scheduler.html?highlight=threadscheduler#google.cloud.pubsub_v1.subscriber.scheduler.ThreadScheduler>`_  # noqa
        :return: `Future <https://googleapis.github.io/google-cloud-python/latest/pubsub/subscriber/api/futures.html>`_  # noqa
        """
        subscription_path = self._client.subscription_path(
            self._gc_project_id, subscription_name
        )
        return self._client.subscribe(
            subscription_path, callback=callback, scheduler=scheduler
        ) 
Example #22
Source File: util.py    From oadoi with MIT License 5 votes vote down vote up
def get_tree(page):
    page = page.replace("&nbsp;", " ")  # otherwise starts-with for lxml doesn't work
    try:
        tree = html.fromstring(page)
    except (etree.XMLSyntaxError, etree.ParserError) as e:
        print u"not parsing, beause etree error in get_tree: {}".format(e)
        tree = None
    return tree 
Example #23
Source File: util.py    From oadoi with MIT License 5 votes vote down vote up
def get_random_dois(n, from_date=None, only_journal_articles=True):
    dois = []
    while len(dois) < n:
        # api takes a max of 100
        number_this_round = min(n, 100)
        url = u"https://api.crossref.org/works?sample={}".format(number_this_round)
        if only_journal_articles:
            url += u"&filter=type:journal-article"
        if from_date:
            url += u",from-pub-date:{}".format(from_date)
        print url
        print "calling crossref, asking for {} dois, so far have {} of {} dois".format(
            number_this_round, len(dois), n)
        r = requests.get(url)
        items = r.json()["message"]["items"]
        dois += [item["DOI"].lower() for item in items]
    return dois


# from https://github.com/elastic/elasticsearch-py/issues/374
# to work around unicode problem
# class JSONSerializerPython2(elasticsearch.serializer.JSONSerializer):
#     """Override elasticsearch library serializer to ensure it encodes utf characters during json dump.
#     See original at: https://github.com/elastic/elasticsearch-py/blob/master/elasticsearch/serializer.py#L42
#     A description of how ensure_ascii encodes unicode characters to ensure they can be sent across the wire
#     as ascii can be found here: https://docs.python.org/2/library/json.html#basic-usage
#     """
#     def dumps(self, data):
#         # don't serialize strings
#         if isinstance(data, elasticsearch.compat.string_types):
#             return data
#         try:
#             return json.dumps(data, default=self.default, ensure_ascii=True)
#         except (ValueError, TypeError) as e:
#             raise elasticsearch.exceptions.SerializationError(data, e) 
Example #24
Source File: _cartesian_class_io.py    From chemcoord with GNU Lesser General Public License v3.0 4 votes vote down vote up
def to_cjson(self, buf=None, **kwargs):
        """Write a cjson file or return dictionary.

        The cjson format is specified
        `here <https://github.com/OpenChemistry/chemicaljson>`_.

        Args:
            buf (str): If it is a filepath, the data is written to
                filepath. If it is None, a dictionary with the cjson
                information is returned.
            kwargs: The keyword arguments are passed into the
                ``dump`` function of the
                `json library <https://docs.python.org/3/library/json.html>`_.

        Returns:
            dict:
        """
        cjson_dict = {'chemical json': 0}

        cjson_dict['atoms'] = {}

        atomic_number = constants.elements['atomic_number'].to_dict()
        cjson_dict['atoms'] = {'elements': {}}
        cjson_dict['atoms']['elements']['number'] = [
            int(atomic_number[x]) for x in self['atom']]

        cjson_dict['atoms']['coords'] = {}
        coords = self.loc[:, ['x', 'y', 'z']].values.reshape(len(self) * 3)
        cjson_dict['atoms']['coords']['3d'] = [float(x) for x in coords]

        bonds = []
        bond_dict = self.get_bonds()
        for i in bond_dict:
            for b in bond_dict[i]:
                bonds += [int(i), int(b)]
                bond_dict[b].remove(i)

        cjson_dict['bonds'] = {'connections': {}}
        cjson_dict['bonds']['connections']['index'] = bonds

        if buf is not None:
            with open(buf, mode='w') as f:
                f.write(json.dumps(cjson_dict, **kwargs))
        else:
            return cjson_dict 
Example #25
Source File: client.py    From rele with Apache License 2.0 4 votes vote down vote up
def publish(self, topic, data, blocking=False, timeout=None, **attrs):
        """Publishes message to Google PubSub topic.

        Usage::

            publisher = Publisher()
            publisher.publish('topic_name', {'foo': 'bar'})

        By default, this method is non-blocking, meaning that the method does
        not wait for the future to be returned.

        If you would like to wait for the future so you can track the message
        later, you can:

        Usage::

            publisher = Publisher()
            future = publisher.publish('topic_name', {'foo': 'bar'}, blocking=True, timeout=10.0) # noqa

        However, it should be noted that using `blocking=True` may incur a
        significant performance hit.

        In addition, the method adds a timestamp `published_at` to the
        message attrs using `epoch floating point number
        <https://docs.python.org/3/library/time.html#time.time>`_.

        :param topic: string topic to publish the data.
        :param data: dict with the content of the message.
        :param blocking: boolean
        :param timeout: float, default None fallsback to :ref:`settings_publisher_timeout`
        :param attrs: additional string parameters to be published.
        :return: `Future <https://googleapis.github.io/google-cloud-python/latest/pubsub/subscriber/api/futures.html>`_  # noqa
        """

        attrs["published_at"] = str(time.time())
        run_middleware_hook("pre_publish", topic, data, attrs)
        payload = json.dumps(data, cls=self._encoder).encode("utf-8")
        topic_path = self._client.topic_path(self._gc_project_id, topic)
        future = self._client.publish(topic_path, payload, **attrs)
        if not blocking:
            return future

        future.result(timeout=timeout or self._timeout)
        run_middleware_hook("post_publish", topic)
        return future 
Example #26
Source File: json_schema.py    From confluent-kafka-python with Apache License 2.0 4 votes vote down vote up
def __call__(self, obj, ctx):
        """
        Serializes an object to the Confluent Schema Registry's JSON binary
        format.

        Args:
            obj (object): object instance to serialize.

            ctx (SerializationContext): Metadata pertaining to the serialization
                operation.

        Note:
            None objects are represented as Kafka Null.

        Raises:
            SerializerError if any error occurs serializing obj

        Returns:
            bytes: Confluent Schema Registry formatted JSON bytes

        """
        if obj is None:
            return None

        subject = self._subject_name_func(ctx, self._schema_name)

        # Check to ensure this schema has been registered under subject_name.
        if self._auto_register and subject not in self._known_subjects:
            # The schema name will always be the same. We can't however register
            # a schema without a subject so we set the schema_id here to handle
            # the initial registration.
            self._schema_id = self._registry.register_schema(subject,
                                                             self._schema)
            self._known_subjects.add(subject)
        elif not self._auto_register and subject not in self._known_subjects:
            registered_schema = self._registry.lookup_schema(subject,
                                                             self._schema)
            self._schema_id = registered_schema.schema_id
            self._known_subjects.add(subject)

        if self._to_dict is not None:
            value = self._to_dict(obj, ctx)
        else:
            value = obj

        try:
            validate(instance=value, schema=self._parsed_schema)
        except ValidationError as ve:
            raise SerializationError(ve.message)

        with _ContextStringIO() as fo:
            # Write the magic byte and schema ID in network byte order (big endian)
            fo.write(struct.pack('>bI', _MAGIC_BYTE, self._schema_id))
            # JSON dump always writes a str never bytes
            # https://docs.python.org/3/library/json.html
            fo.write(json.dumps(value).encode('utf8'))

            return fo.getvalue()