Python xml.sax.parse() Examples

The following are 30 code examples for showing how to use xml.sax.parse(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module xml.sax , or try the search function .

Example 1
Project: NoobSec-Toolkit   Author: krintoxi   File: common.py    License: GNU General Public License v2.0 7 votes vote down vote up
def getAutoDirectories():
    retVal = set()

    if kb.absFilePaths:
        infoMsg = "retrieved web server absolute paths: "
        infoMsg += "'%s'" % ", ".join(ntToPosixSlashes(path) for path in kb.absFilePaths)
        logger.info(infoMsg)

        for absFilePath in kb.absFilePaths:
            if absFilePath:
                directory = directoryPath(absFilePath)
                directory = ntToPosixSlashes(directory)
                retVal.add(directory)
    else:
        warnMsg = "unable to automatically parse any web server path"
        logger.warn(warnMsg)

    _ = extractRegexResult(r"//[^/]+?(?P<result>/.*)/", conf.url)  # web directory

    if _:
        retVal.add(_)

    return list(retVal) 
Example 2
Project: NoobSec-Toolkit   Author: krintoxi   File: common.py    License: GNU General Public License v2.0 7 votes vote down vote up
def getAutoDirectories():
    retVal = set()

    if kb.absFilePaths:
        infoMsg = "retrieved web server absolute paths: "
        infoMsg += "'%s'" % ", ".join(ntToPosixSlashes(path) for path in kb.absFilePaths)
        logger.info(infoMsg)

        for absFilePath in kb.absFilePaths:
            if absFilePath:
                directory = directoryPath(absFilePath)
                directory = ntToPosixSlashes(directory)
                retVal.add(directory)
    else:
        warnMsg = "unable to automatically parse any web server path"
        logger.warn(warnMsg)

    _ = extractRegexResult(r"//[^/]+?(?P<result>/.*)/", conf.url)  # web directory

    if _:
        retVal.add(_)

    return list(retVal) 
Example 3
Project: NoobSec-Toolkit   Author: krintoxi   File: common.py    License: GNU General Public License v2.0 6 votes vote down vote up
def extractErrorMessage(page):
    """
    Returns reported error message from page if it founds one

    >>> extractErrorMessage(u'<html><title>Test</title>\\n<b>Warning</b>: oci_parse() [function.oci-parse]: ORA-01756: quoted string not properly terminated<br><p>Only a test page</p></html>')
    u'oci_parse() [function.oci-parse]: ORA-01756: quoted string not properly terminated'
    """

    retVal = None

    if isinstance(page, basestring):
        for regex in ERROR_PARSING_REGEXES:
            match = re.search(regex, page, re.DOTALL | re.IGNORECASE)

            if match:
                retVal = htmlunescape(match.group("result")).replace("<br>", "\n").strip()
                break

    return retVal 
Example 4
Project: NoobSec-Toolkit   Author: krintoxi   File: common.py    License: GNU General Public License v2.0 6 votes vote down vote up
def extractErrorMessage(page):
    """
    Returns reported error message from page if it founds one

    >>> extractErrorMessage(u'<html><title>Test</title>\\n<b>Warning</b>: oci_parse() [function.oci-parse]: ORA-01756: quoted string not properly terminated<br><p>Only a test page</p></html>')
    u'oci_parse() [function.oci-parse]: ORA-01756: quoted string not properly terminated'
    """

    retVal = None

    if isinstance(page, basestring):
        for regex in ERROR_PARSING_REGEXES:
            match = re.search(regex, page, re.DOTALL | re.IGNORECASE)

            if match:
                retVal = htmlunescape(match.group("result")).replace("<br>", "\n").strip()
                break

    return retVal 
Example 5
Project: flake8-bandit   Author: tylerwince   File: xml_sax.py    License: MIT License 6 votes vote down vote up
def main():
    xmlString = "<note>\n<to>Tove</to>\n<from>Jani</from>\n<heading>Reminder</heading>\n<body>Don't forget me this weekend!</body>\n</note>"
    # bad
    xml.sax.parseString(xmlString, ExampleContentHandler())
    xml.sax.parse('notaxmlfilethatexists.xml', ExampleContentHandler())
    sax.parseString(xmlString, ExampleContentHandler())
    sax.parse('notaxmlfilethatexists.xml', ExampleContentHandler)

    # good
    defusedxml.sax.parseString(xmlString, ExampleContentHandler())

    # bad
    xml.sax.make_parser()
    sax.make_parser()
    print('nothing')
    # good
    defusedxml.sax.make_parser() 
Example 6
Project: darkc0de-old-stuff   Author: tuwid   File: html.py    License: GNU General Public License v3.0 6 votes vote down vote up
def htmlParser(page):
    """
    This function calls a class that parses the input HTML page to
    fingerprint the back-end database management system
    """

    xmlfile = paths.ERRORS_XML
    checkFile(xmlfile)
    page = sanitizeStr(page)
    handler = htmlHandler(page)
    parse(xmlfile, handler)

    if handler.dbms and handler.dbms not in kb.htmlFp:
        kb.htmlFp.append(handler.dbms)

    return handler.dbms 
Example 7
Project: POC-EXP   Author: ym2011   File: common.py    License: GNU General Public License v3.0 6 votes vote down vote up
def getAutoDirectories():
    retVal = set()

    if kb.absFilePaths:
        infoMsg = "retrieved web server absolute paths: "
        infoMsg += "'%s'" % ", ".join(ntToPosixSlashes(path) for path in kb.absFilePaths)
        logger.info(infoMsg)

        for absFilePath in kb.absFilePaths:
            if absFilePath:
                directory = directoryPath(absFilePath)
                directory = ntToPosixSlashes(directory)
                retVal.add(directory)
    else:
        warnMsg = "unable to automatically parse any web server path"
        logger.warn(warnMsg)

    _ = extractRegexResult(r"//[^/]+?(?P<result>/.*)/", conf.url)  # web directory

    if _:
        retVal.add(_)

    return list(retVal) 
Example 8
Project: POC-EXP   Author: ym2011   File: common.py    License: GNU General Public License v3.0 6 votes vote down vote up
def extractErrorMessage(page):
    """
    Returns reported error message from page if it founds one

    >>> extractErrorMessage(u'<html><title>Test</title>\\n<b>Warning</b>: oci_parse() [function.oci-parse]: ORA-01756: quoted string not properly terminated<br><p>Only a test page</p></html>')
    u'oci_parse() [function.oci-parse]: ORA-01756: quoted string not properly terminated'
    """

    retVal = None

    if isinstance(page, basestring):
        for regex in ERROR_PARSING_REGEXES:
            match = re.search(regex, page, re.DOTALL | re.IGNORECASE)

            if match:
                retVal = htmlunescape(match.group("result")).replace("<br>", "\n").strip()
                break

    return retVal 
Example 9
Project: EasY_HaCk   Author: sabri-zaki   File: common.py    License: Apache License 2.0 6 votes vote down vote up
def getAutoDirectories():
    retVal = set()

    if kb.absFilePaths:
        infoMsg = "retrieved web server absolute paths: "
        infoMsg += "'%s'" % ", ".join(ntToPosixSlashes(path) for path in kb.absFilePaths)
        logger.info(infoMsg)

        for absFilePath in kb.absFilePaths:
            if absFilePath:
                directory = directoryPath(absFilePath)
                directory = ntToPosixSlashes(directory)
                retVal.add(directory)
    else:
        warnMsg = "unable to automatically parse any web server path"
        logger.warn(warnMsg)

    return list(retVal) 
Example 10
Project: EasY_HaCk   Author: sabri-zaki   File: common.py    License: Apache License 2.0 6 votes vote down vote up
def extractErrorMessage(page):
    """
    Returns reported error message from page if it founds one

    >>> extractErrorMessage(u'<html><title>Test</title>\\n<b>Warning</b>: oci_parse() [function.oci-parse]: ORA-01756: quoted string not properly terminated<br><p>Only a test page</p></html>')
    u'oci_parse() [function.oci-parse]: ORA-01756: quoted string not properly terminated'
    """

    retVal = None

    if isinstance(page, basestring):
        for regex in ERROR_PARSING_REGEXES:
            match = re.search(regex, page, re.DOTALL | re.IGNORECASE)

            if match:
                retVal = htmlunescape(match.group("result")).replace("<br>", "\n").strip()
                break

    return retVal 
Example 11
Project: EventGhost   Author: EventGhost   File: __init__.py    License: GNU General Public License v2.0 6 votes vote down vote up
def __call__(self,fav=1):
        hwnds = Handle()
        if len(hwnds) > 0:
            ScreamerPath = self.plugin.ScreamerPath
            xmltoparse = ScreamerPath+'\\favorites.xml'
            self.dh2 = my_xml_handler2()
            sax.parse(xmltoparse.encode(eg.systemEncoding), self.dh2)
            if fav <= len(self.plugin.favList):
                self.plugin.fav_num=fav-1
                PostMessage(hwnds[0], WM_COMMAND, 9216+fav, 0)
                return str(fav)+": "+self.plugin.favList[self.plugin.fav_num]
            else:
                self.PrintError(
                    self.text.over % (str(fav),str(len(self.plugin.favList))))
                return self.text.over % (str(fav),str(len(self.plugin.favList)))
        else:
            self.PrintError(self.plugin.text.text1)
            return self.plugin.text.text1 
Example 12
Project: EventGhost   Author: EventGhost   File: __init__.py    License: GNU General Public License v2.0 6 votes vote down vote up
def __call__(self):
        hwnds = Handle()
        if len(hwnds) > 0:
            ScreamerPath = self.plugin.ScreamerPath
            xmltoparse = ScreamerPath+'\\favorites.xml'
            self.dh2 = my_xml_handler2()
            sax.parse(xmltoparse.encode(eg.systemEncoding), self.dh2)
            if eval(self.value[2]):
                self.plugin.fav_num += self.value[0]
            else:
                self.plugin.fav_num = eval(self.value[1])
            PostMessage(hwnds[0], WM_COMMAND, 9217+self.plugin.fav_num, 0)
            num = self.plugin.fav_num
            return (str(num+1)+": "+self.plugin.favList[num])
        else:
            self.PrintError(self.plugin.text.text1)
            return self.plugin.text.text1

#=============================================================================== 
Example 13
Project: PythonClassBook   Author: PythonClassRoom   File: fig16_16.py    License: GNU General Public License v3.0 5 votes vote down vote up
def main():
   file = raw_input( "Enter a file to parse: " )
   tagName = raw_input( "Enter tag to search for: " )
   
   try:
      parse( file, TagInfoHandler( tagName ) )

   # handle exception if unable to open file
   except IOError, message:
      print "Error reading file:", message

   # handle exception parsing file 
Example 14
Project: NoobSec-Toolkit   Author: krintoxi   File: common.py    License: GNU General Public License v2.0 5 votes vote down vote up
def parseXmlFile(xmlFile, handler):
    """
    Parses XML file by a given handler
    """

    try:
        with contextlib.closing(StringIO(readCachedFileContent(xmlFile))) as stream:
            parse(stream, handler)
    except (SAXParseException, UnicodeError), ex:
        errMsg = "something seems to be wrong with "
        errMsg += "the file '%s' ('%s'). Please make " % (xmlFile, ex)
        errMsg += "sure that you haven't made any changes to it"
        raise SqlmapInstallationException, errMsg 
Example 15
Project: NoobSec-Toolkit   Author: krintoxi   File: common.py    License: GNU General Public License v2.0 5 votes vote down vote up
def readXmlFile(xmlFile):
    """
    Reads XML file content and returns its DOM representation
    """

    checkFile(xmlFile)
    retVal = minidom.parse(xmlFile).documentElement

    return retVal 
Example 16
Project: NoobSec-Toolkit   Author: krintoxi   File: common.py    License: GNU General Public License v2.0 5 votes vote down vote up
def readXmlFile(xmlFile):
    """
    Reads XML file content and returns its DOM representation
    """

    checkFile(xmlFile)
    retVal = minidom.parse(xmlFile).documentElement

    return retVal 
Example 17
Project: bandit   Author: PyCQA   File: xml_sax.py    License: Apache License 2.0 5 votes vote down vote up
def main():
    xmlString = "<note>\n<to>Tove</to>\n<from>Jani</from>\n<heading>Reminder</heading>\n<body>Don't forget me this weekend!</body>\n</note>"
    # bad
    xml.sax.parseString(xmlString, ExampleContentHandler())
    xml.sax.parse('notaxmlfilethatexists.xml', ExampleContentHandler())
    sax.parseString(xmlString, ExampleContentHandler())
    sax.parse('notaxmlfilethatexists.xml', ExampleContentHandler)

    # good
    defusedxml.sax.parseString(xmlString, ExampleContentHandler())

    # bad
    xml.sax.make_parser()
    sax.make_parser()
    print('nothing')
    # good
    defusedxml.sax.make_parser() 
Example 18
Project: time_series   Author: Open-Power-System-Data   File: read.py    License: MIT License 5 votes vote down vote up
def terna_file_to_initial_dataframe(filepath):
    '''
    Parse the xml or read excel directly, 
    returning the data from the file in a simple-index dataframe.

    Some files are formated as xml, some are pure excel files.
    This function handles both cases.

    Parameters:
    ----------
    filepath: str 
        The path of the file to process

    Returns:
    ----------
    df: pandas.DataFrame
        A pandas dataframe containing the data from the specified file.

    '''
    # First, we'll try to parse the file as if it is xml.
    try:
        excelHandler = ExcelHandler()
        parse(filepath, excelHandler)

        # Create the dataframe from the parsed data
        df = pd.DataFrame(excelHandler.tables[0][2:],
                          columns=excelHandler.tables[0][1])

        # Convert the "Generation [MWh]"-column to numeric
        df['Generation [MWh]'] = pd.to_numeric(df['Generation [MWh]'])
    except:
        # In the case of an exception, treat the file as excel.
        try:
            df = pd.read_excel(filepath, header=1)
        except xlrd.XLRDError:
            df = pd.DataFrame()

    return df 
Example 19
Project: darkc0de-old-stuff   Author: tuwid   File: headers.py    License: GNU General Public License v3.0 5 votes vote down vote up
def headersParser(headers):
    """
    This function calls a class that parses the input HTTP headers to
    fingerprint the back-end database management system operating system
    and the web application technology
    """

    # It is enough to parse the headers on first four HTTP responses
    if kb.headersCount > 3:
        return

    kb.headersCount += 1

    topHeaders = {
                   "cookie":                          "%s/cookie.xml" % paths.SQLMAP_XML_BANNER_PATH,
                   "microsoftsharepointteamservices": "%s/sharepoint.xml" % paths.SQLMAP_XML_BANNER_PATH,
                   "server":                          "%s/server.xml" % paths.SQLMAP_XML_BANNER_PATH,
                   "servlet-engine":                  "%s/servlet.xml" % paths.SQLMAP_XML_BANNER_PATH,
                   "set-cookie":                      "%s/cookie.xml" % paths.SQLMAP_XML_BANNER_PATH,
                   "x-aspnet-version":                "%s/x-aspnet-version.xml" % paths.SQLMAP_XML_BANNER_PATH,
                   "x-powered-by":                    "%s/x-powered-by.xml" % paths.SQLMAP_XML_BANNER_PATH,
                 }

    for header in headers:
        if header in topHeaders.keys():
            value   = headers[header]
            xmlfile = topHeaders[header]

            checkFile(xmlfile)

            handler = FingerprintHandler(value, kb.headersFp)

            parse(xmlfile, handler)
            parse(paths.GENERIC_XML, handler) 
Example 20
Project: darkc0de-old-stuff   Author: tuwid   File: banner.py    License: GNU General Public License v3.0 5 votes vote down vote up
def bannerParser(banner):
    """
    This function calls a class to extract information from the given
    DBMS banner based upon the data in XML file
    """

    if kb.dbms == "Microsoft SQL Server":
        xmlfile = paths.MSSQL_XML
    elif kb.dbms == "MySQL":
        xmlfile = paths.MYSQL_XML
    elif kb.dbms == "Oracle":
        xmlfile = paths.ORACLE_XML
    elif kb.dbms == "PostgreSQL":
        xmlfile = paths.PGSQL_XML

    checkFile(xmlfile)

    if kb.dbms == "Microsoft SQL Server":
        handler = MSSQLBannerHandler(banner)
        parse(xmlfile, handler)

        handler = FingerprintHandler(banner, kb.bannerFp)
        parse(paths.GENERIC_XML, handler)
    else:
        handler = FingerprintHandler(banner, kb.bannerFp)
        parse(xmlfile, handler)
        parse(paths.GENERIC_XML, handler) 
Example 21
Project: darkc0de-old-stuff   Author: tuwid   File: queriesfile.py    License: GNU General Public License v3.0 5 votes vote down vote up
def queriesParser():
    """
    This function calls a class to parse the default DBMS queries
    from an XML file
    """

    debugMsg = "parsing XML queries file"
    logger.debug(debugMsg)

    xmlfile = paths.QUERIES_XML

    checkFile(xmlfile)
    handler = queriesHandler()
    parse(xmlfile, handler) 
Example 22
Project: POC-EXP   Author: ym2011   File: common.py    License: GNU General Public License v3.0 5 votes vote down vote up
def parseXmlFile(xmlFile, handler):
    """
    Parses XML file by a given handler
    """

    try:
        with contextlib.closing(StringIO(readCachedFileContent(xmlFile))) as stream:
            parse(stream, handler)
    except (SAXParseException, UnicodeError), ex:
        errMsg = "something seems to be wrong with "
        errMsg += "the file '%s' ('%s'). Please make " % (xmlFile, ex)
        errMsg += "sure that you haven't made any changes to it"
        raise SqlmapInstallationException, errMsg 
Example 23
Project: POC-EXP   Author: ym2011   File: common.py    License: GNU General Public License v3.0 5 votes vote down vote up
def readXmlFile(xmlFile):
    """
    Reads XML file content and returns its DOM representation
    """

    checkFile(xmlFile)
    retVal = minidom.parse(xmlFile).documentElement

    return retVal 
Example 24
Project: EasY_HaCk   Author: sabri-zaki   File: common.py    License: Apache License 2.0 5 votes vote down vote up
def parseXmlFile(xmlFile, handler):
    """
    Parses XML file by a given handler
    """

    try:
        with contextlib.closing(StringIO(readCachedFileContent(xmlFile))) as stream:
            parse(stream, handler)
    except (SAXParseException, UnicodeError), ex:
        errMsg = "something appears to be wrong with "
        errMsg += "the file '%s' ('%s'). Please make " % (xmlFile, getSafeExString(ex))
        errMsg += "sure that you haven't made any changes to it"
        raise SqlmapInstallationException(errMsg) 
Example 25
Project: EasY_HaCk   Author: sabri-zaki   File: common.py    License: Apache License 2.0 5 votes vote down vote up
def readXmlFile(xmlFile):
    """
    Reads XML file content and returns its DOM representation
    """

    checkFile(xmlFile)
    retVal = minidom.parse(xmlFile).documentElement

    return retVal 
Example 26
Project: EventGhost   Author: EventGhost   File: __init__.py    License: GNU General Public License v2.0 5 votes vote down vote up
def __start__(self, ScreamerPath, path2 = None):
        self.ScreamerPath = ScreamerPath
        self.path2 = path2
        xmltoparse = ScreamerPath+'\\screamer.xml'
        self.dh = my_xml_handler1()
        sax.parse(xmltoparse.encode(eg.systemEncoding), self.dh)
        xmltoparse = self.dh.document['LanguageFile']
        sax.parse(xmltoparse.encode(eg.systemEncoding), self.dh) 
Example 27
Project: EventGhost   Author: EventGhost   File: __init__.py    License: GNU General Public License v2.0 5 votes vote down vote up
def __call__(self, play=False, fav = 1):
        flag = self.plugin.Execute('screamer.exe',self.plugin.ScreamerPath)
        if self.plugin.path2:
            self.plugin.Execute('Start_SR_Events.exe',self.plugin.path2)

        if flag:
                if self.plugin.path2:
                    self.plugin.Execute('Start_SR_Events.exe',self.plugin.path2)
                if play:
                    for n in range(50):
                        sleep(.2)
                        hwnds = Handle()
                        if len(hwnds) > 0:
                            flag = False
                            break
                    if not flag:
                        sleep(2)
                        ScreamerPath = self.plugin.ScreamerPath
                        xmltoparse = ScreamerPath+'\\favorites.xml'
                        self.dh2 = my_xml_handler2()
                        sax.parse(xmltoparse.encode(eg.systemEncoding), self.dh2)
                        if fav <= len(self.plugin.favList):
                            self.plugin.fav_num=fav-1
                            PostMessage(hwnds[0], WM_COMMAND, 9216+fav, 0)
                            return str(fav)+": "+self.plugin.favList[self.plugin.fav_num]
                        else:
                            return self.text.over % (str(fav),\
                                str(len(self.plugin.favList)))
                    else:
                        return self.plugin.text.text1
                else:
                    return self.text.alt_ret 
Example 28
Project: time_series   Author: Open-Power-System-Data   File: read.py    License: MIT License 4 votes vote down vote up
def read_energinet_dk(filepath):
    '''Read a file from energinet.dk into a DataFrame'''
    df = pd.read_excel(
        io=filepath,
        header=2,  # the column headers are taken from 3rd row.
        # 2nd row also contains header info like in a multiindex,
        # i.e. wether the colums are price or generation data.
        # However, we will make our own columnnames below.
        # Row 3 is enough to unambiguously identify the columns
        skiprows=None,
        index_col=None,
        parse_dates=True,
        dayfirst=False,
        usecols=None,  # None means: parse all columns
        thousands=',',
        # hours in 2nd column run from 1-24, we need 0-23:
        # (converters seem not to work in combination with parse_dates)
        converters={1: lambda x: x - 1}
    )

    # Create the timestamp column and set as index
    df.index = df.iloc[:, 0] + pd.to_timedelta(df.iloc[:, 1], unit='h')

    # DST-handling
    # Create a list of spring-daylight savings time (DST)-transitions
    dst_transitions_spring = [
        d.replace(hour=2)
        for d in pytz.timezone('Europe/Copenhagen')._utc_transition_times
        if 2000 <= d.year <= datetime.today().year and d.month == 3]

    # Drop 3rd hour for (spring) DST-transition from df.
    df = df[~df.index.isin(dst_transitions_spring)]

    # Verify that daylight savings time transitions are handled as expected
    check_dst(df.index, autumn_expect=1)

    # Conform index to UTC
    dst_arr = np.ones(len(df.index), dtype=bool)
    df.index = df.index.tz_localize('Europe/Copenhagen', ambiguous=dst_arr)
    df.index = df.index.tz_convert(None)

    return df 
Example 29
Project: time_series   Author: Open-Power-System-Data   File: read.py    License: MIT License 4 votes vote down vote up
def read_entso_e_portal(filepath):
    '''Read a file from the old ENTSO-E Data Portal into a DataFrame'''
    df = pd.read_excel(
        io=filepath,
        header=3,  # 0 indexed, so the column names are actually in the 4th row
        skiprows=None,
        # create MultiIndex from first 2 columns ['date', 'Country']
        index_col=[0, 1],
        parse_dates={'date': ['Year', 'Month', 'Day']},
        dayfirst=False,
        usecols=None,  # None means: parse all columns
    )

    # The "Coverage ratio"-column specifies for some countries scaling factor
    # with which we should upscale the reported values
    df = df.divide(df.pop('Coverage ratio'), axis='index') * 100

    # The original data has days and countries in the rows and hours in the
    # columns.  This rearranges the table, mapping hours on the rows and
    # countries on the columns.
    df.columns.names = ['hour']
    df = df.stack(level='hour').unstack(level='Country').reset_index()

    # Create the timestamp column and set as index
    df.index = df.pop('date') + pd.to_timedelta(df.pop('hour'), unit='h')

    # DST-handling
    # Delete values in DK and FR that should not exist
    df = df.loc[df.index != '2015-03-29 02:00', :]

    # Delete values in DK that are obviously twice as high as they should be
    df.loc[df.index.isin(['2014-10-26 02:00:00', '2015-10-25 02:00:00']),
           'DK'] = np.nan

    # Delete values in UK that are all zero except for one day
    df.loc[(df.index.year == 2010) & (df.index.month == 1), 'GB'] = np.nan

    # Delete values in CY that are mostly zero but not always
    df.loc[(df.index.year < 2013), 'CY'] = np.nan

    # Zero load is highly unlikely. Such occurences are actually NaNs
    df.replace(0, np.nan, inplace=True)

    # Verify that daylight savings time transitions are handled as expected
    check_dst(df.index, autumn_expect=1)
    # Conform index to UTC
    dst_arr = np.ones(len(df.index), dtype=bool)
    df.index = df.index.tz_localize('CET', ambiguous=dst_arr)
    df.index = df.index.tz_convert(None)

    # Rename regions to comply with naming conventions
    renamer = {'DK_W': 'DK_1', 'UA_W': 'UA_west', 'NI': 'GB_NIR', 'GB': 'GB_GBN'}
    df.rename(columns=renamer, inplace=True)

    # Calculate load for whole UK from Great Britain and Northern Ireland data
    df['GB_UKM'] = df['GB_GBN'].add(df['GB_NIR'])

    return df 
Example 30
Project: purl.obolibrary.org   Author: OBOFoundry   File: migrate.py    License: BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def main():
  parser = argparse.ArgumentParser(description='Migrate XML to YAML')
  parser.add_argument('idspace',
                      type=str,
                      help='the project IDSPACE, e.g. FOO')
  parser.add_argument('xml_file',
                      type=argparse.FileType('r'),
                      default=sys.stdin,
                      nargs='?',
                      help='read from the XML file (or STDIN)')
  parser.add_argument('yaml_file',
                      type=str,
                      nargs='?',
                      help='write to the YAML file (or STDOUT)')
  args = parser.parse_args()

  args.upper_idspace = args.idspace.upper()
  args.lower_idspace = args.idspace.lower()
  args.base_url = '/obo/' + args.lower_idspace
  if args.yaml_file is not None:
    try:
      args.yaml_file = open(args.yaml_file, 'w')
    except FileNotFoundError:
      os.makedirs(os.path.dirname(args.yaml_file))
      args.yaml_file = open(args.yaml_file, 'w')
  else:
    args.yaml_file = sys.stdout

  sax = xml.sax.make_parser()
  sax.setContentHandler(OCLCHandler(args))
  sax.parse(args.xml_file)

  entries = exact + sorted(prefix, key=lambda k: len(k['id']), reverse=True)
  if len(entries) == 0:
    raise ValueError('No entries to migrate')

  args.yaml_file.write(header_template %
                       (args.base_url, args.upper_idspace, args.base_url, args.lower_idspace,
                        args.lower_idspace))
  for entry in entries:
    args.yaml_file.write(entry_template %
                         (entry['rule'], entry['id'], entry['url']))

  args.yaml_file.close()


# Define a SAX ContentHandler class to match the XML format,
# and accumulate entry dictionaries into the global lists.
# See example above for XML format.