python source code of SSURGO

# SSURGO_BatchDownload.py
#
# Download SSURGO data from Web Soil Survey
#
# Uses Soil Data Access query to generate choicelist and URLs for each survey
#
# Three different tools call this script. One tool uses an Areasymbol wildcard to
# select surveys for download. Another tool uses an Areaname wildcard to
# elect surveys for download. The third uses an SAPOLYGON layer to generate a list
# of Areasymbol values to select surveys for download.
#
# Requires MS Access to run optional text file import for a custom SSURGO Template DB,
# as well as a modification to the VBA in the Template DB. Name of macro is BatchImport

# There are a lot of problems with WSS 3.0. One issue is trying to determine which surveys have
# spatial data. Normally this should be sapubstatuscode = 2.
# According to Gary, there is a finer level of detail available in the sastatusmap table.
# The columns tabularmudist and spatialmudist tell what kind of mapunit data is present in either the
# tabular or spatial portions. The possible values are:
#
# 1 = has ordinary mapunits and no NOTCOM mapunits
# 2 = has both ordinary and NOTCOM mapunits
# 3 = has only NOTCOM mapunits
# 4 = has no mapunits at all
#
# 10-31-2013
# 11-22-2013
# 01-08-2014
# 01-16-2014 Bad bug, downloads and unzips extra copy of some downloads. fixed.
# 01-22-2014 Modified interface to require that one of the batchimport mdb files be used.
#            Posted all available state template databases to NRCS-GIS sharepoint
#
# Looking at potential for getting old downloads from the Staging Server. Lots of issues to consider...
# Staging Server URL requires E-Auth and has subdirectories
# 04-16-2014 https://soils-staging.sc.egov.usda.gov/NASIS_Export/Staging2Ssurgo/
#
# 05-13-2014 Modified unzip routine to handle other subfolder names at version 3.1 of WSS.
#
# 08-07-2014 Added function to find MS Access application by searching the Registry
# Looks under HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths
#
# 2014-09-27 Added post-import check to make sure tabular import was at least partially successful.
# Bails out if the SACATALOG table does not contain the SAVEREST date
#
# New version of script. Attempting to move most of the main code to functions so
# that failover works better. Breaks are a little messy and it wants to keep running no mater what.
#
# 2014-10-05 Added option to include NOTCOM survey using the tool validation query
#
# 2014-10-10 Removed requirement for MS Access. Now uses csv reader if the user chooses to perform
#            the tabular import. It is not required.

# 2014-10-13 Modified to populate the "SYSTEM - Mapunit Sort Specifications" table
# NEED TO DO THE SAME FOR THE  "SYSTEM - INTERP DEPTH SEQUENCE TABLE"
# NEED TO LOOK AT IL177 legend.txt. Adolfo says this one will fail to import unless
# the csv reader is bumped up using csv.field_size_limit(sys.maxsize). Has failed at 128KB. Bumped to 512KB.
# Might also look at c = csv.reader(f, delimiter='|', quoting=csv.QUOTE_NONE)

# 2014-10-18 Modified SYSTEM table to only include cointerp records with ruledepth=0
# 2014-10-28 Increased sleep time before and after compact because of of errors
# 2014-10-30 Some problems adding MUNAME field to shapefile when output folder is on network share.
#
# 2015-03-13 Some improvements to the date-check/overwrite logic and messaging

## ===================================================================================
class MyError(Exception):
    pass

## ===================================================================================
def errorMsg():
    try:
        tb = sys.exc_info()[2]
        tbinfo = traceback.format_tb(tb)[0]
        theMsg = tbinfo + " \n" + str(sys.exc_type)+ ": " + str(sys.exc_value) + " \n"
        PrintMsg(theMsg, 2)

    except:
        PrintMsg("Unhandled error in errorMsg method", 2)
        pass

## ===================================================================================
def PrintMsg(msg, severity=0):
    # Adds tool message to the geoprocessor
    #
    #Split the message on \n first, so that if it's multiple lines, a GPMessage will be added for each line
    try:
        for string in msg.split('\n'):
            #Add a geoprocessing message (in case this is run as a tool)
            if severity == 0:
                arcpy.AddMessage(string)

            elif severity == 1:
                arcpy.AddWarning(string)

            elif severity == 2:
                arcpy.AddError(" \n" + string)

    except:
        pass

## ===================================================================================
def Number_Format(num, places=0, bCommas=True):
    try:
    # Format a number according to locality and given places
        #locale.setlocale(locale.LC_ALL, "")
        if bCommas:
            theNumber = locale.format("%.*f", (places, num), True)

        else:
            theNumber = locale.format("%.*f", (places, num), False)
        return theNumber

    except:
        errorMsg()
        return ""

## ===================================================================================
def CheckMSAccess():
    # Not using this function any more
    #
    # Make sure this computer has MS Access installed so that the tabular import will run

    try:
        msa = "MSACCESS.EXE"
        aReg = ConnectRegistry(None, HKEY_LOCAL_MACHINE)
        aKey = OpenKey(aReg, r"SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths")
        acccessPath = ""

        for i in range(1024):
            keyName = EnumKey(aKey, i)

            if keyName == msa:
                subKey = OpenKey(aKey, keyName)
                installPath = QueryValueEx(subKey, "Path")
                accessPath = os.path.join(installPath[0], msa)
                break

        return accessPath

    except WindowsError:
        return ""

    except:
        errorMsg()
        return ""

## ===================================================================================
def GetPublicationDate(areaSym):
    #
    #
    #
    # Please Note!!! Funtion not being used at this time
    # Alternate method of getting SSURGO publication date using SDM Access query
    #
    # This should use SASTATUSMAP table instead of SACATALOG
    # Add 'AND SAPUBSTATUSCODE = 2'
    import time, datetime, httplib, urllib2
    import xml.etree.cElementTree as ET

    try:

        # date formatting
        #    today = datetime.date.today()
        #    myDate = today + datetime.timedelta(days = -(self.params[0].value))
        #    myDate = str(myDate).replace("-","")
        #    wc = "'" + self.params[1].value + "%' AND SAVEREST > '" + myDate + "'"

        # return list sorted by date
        #SELECT S.AREASYMBOL, CONVERT (varchar(10), [SAVEREST], 126) AS SDATE FROM SACATALOG S WHERE AREASYMBOL LIKE 'KS%'

        #sQuery = "SELECT CONVERT(varchar(10), [SAVEREST], 126) AS SAVEREST FROM SACATALOG WHERE AREASYMBOL = '" + areaSym + "'"
        sQuery = "SELECT CONVERT(varchar(10), [SAVEREST], 126) AS SAVEREST FROM SASTATUSMAP WHERE AREASYMBOL = '" + areaSym + "' AND SAPUBSTATUSCODE = 2"

        # Send XML query to SDM Access service
        #
        sXML = """<?xml version="1.0" encoding="utf-8"?>
    <soap12:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap12="http://www.w3.org/2003/05/soap-envelope">
      <soap12:Body>
        <RunQuery xmlns="http://SDMDataAccess.nrcs.usda.gov/Tabular/SDMTabularService.asmx">
          <Query>""" + sQuery + """</Query>
        </RunQuery>
      </soap12:Body>
    </soap12:Envelope>"""

        dHeaders = dict()
        dHeaders["Host"] = "sdmdataaccess.nrcs.usda.gov"
        dHeaders["Content-Type"] = "text/xml; charset=utf-8"
        dHeaders["SOAPAction"] = "http://SDMDataAccess.nrcs.usda.gov/Tabular/SDMTabularService.asmx/RunQuery"
        dHeaders["Content-Length"] = len(sXML)
        sURL = "SDMDataAccess.nrcs.usda.gov"

        # Create SDM connection to service using HTTP
        conn = httplib.HTTPConnection(sURL, 80)

        # Send request in XML-Soap
        conn.request("POST", "/Tabular/SDMTabularService.asmx", sXML, dHeaders)

        # Get back XML response
        response = conn.getresponse()
        xmlString = response.read()

        # Close connection to SDM
        conn.close()

        # Convert XML to tree format
        tree = ET.fromstring(xmlString)

        iCnt = 0
        # Create empty value list
        valList = list()

        # Iterate through XML tree, finding required elements...
        for rec in tree.iter():

            if rec.tag == "SAVEREST":
                # get the YYYYMMDD part of the datetime string
                # then reformat to match SQL query
                sdmDate = str(rec.text).split(" ")[0]

        return sdmDate

    except:
        errorMsg()
        return 0


## ===================================================================================
def SSURGOVersion(newDB, tabularFolder):
    # Get SSURGO version from the Template database "SYSTEM Template Database Information" table

    #
    # Ideally we want to compare with the value in version.txt with the version in
    # the "SYSTEM - Template Database Information" table. If they are not the same
    # the tabular import should be aborted. There are some more specifics about the
    # SSURGO version.txt valu in one of the Import macros of the Template database.
    # Need to follow up and research this more.
    # At this time we are only checking the first 'digit' of the string value.
    #
    # Should be able to get this to work using wildcard for fields and then
    # use the version.txt as an alternative or failover.
    try:
        # Valid SSURGO version for data model. Ensures
        # compatibility between template database and SSURGO download.
        versionTxt = os.path.join(tabularFolder, "version.txt")

        if not arcpy.Exists(newDB):
            raise MyError, "Missing input database (" + newDB + ")"

        if arcpy.Exists(versionTxt):
            # read just the first line of the version.txt file
            fh = open(versionTxt, "r")
            txtVersion = fh.readline().split(".")[0]
            fh.close()

        else:
            # Unable to compare vesions. Warn user but continue
            PrintMsg("Unable to find file: version.txt", 1)
            return True

        systemInfo = os.path.join(newDB, "SYSTEM - Template Database Information")

        if arcpy.Exists(systemInfo):
            # Get SSURGO Version from template database
            dbVersion = 0

            with arcpy.da.SearchCursor(systemInfo, "*", "") as srcCursor:
                for rec in srcCursor:
                    if rec[0] == "SSURGO Version":
                        dbVersion = str(rec[2]).split(".")[0]
                        #PrintMsg("\tSSURGO Version from DB: " + dbVersion, 1)

            del systemInfo
            del newDB

            if txtVersion != dbVersion:
                # SSURGO Versions do not match. Warn user but continue
                PrintMsg("Discrepancy in SSURGO Version number for Template database and SSURGO download", 1)

        else:
            # Unable to open SYSTEM table in existing dataset
            # Warn user but continue
            PrintMsg("Unable to open 'SYSTEM - Template Database Information'", 1)

        return True

    except MyError, e:
        # Example: raise MyError, "This is an error message"
        PrintMsg(str(e), 2)
        return False

    except:
        errorMsg()
        return False

## ===================================================================================
def GetTemplateDate(newDB):
    # Get SAVEREST date from previously existing Template database
    # Use it to compare with the date from the WSS dataset
    # If the existing database is same or newer, it will be kept and the WSS version skipped
    #
    # da cursor will return:  datetime.datetime(2014, 12, 1, 15, 22, 8)
    # Should be able to reformat to an integer value for comparison with filename-imbedded date.
    #
    try:
        #if not arcpy.Exists(newDB):  # Check for existance before calling this function
        #    return 0
        saCatalog = os.path.join(newDB, "SACATALOG")
        dbDate = 0

        if arcpy.Exists(saCatalog):
            dateObj = None
            with arcpy.da.SearchCursor(saCatalog, ("SAVEREST"), "[AREASYMBOL] = '" + areaSym + "'") as srcCursor:
                for rec in srcCursor:
                    # Reformat datetime to YYYYMMDD and convert to integer
                    #dateObj = int(rec[0].strftime('%Y%m%d'))
                    dateObj = rec[0]

            if dateObj is None:
                return 0

            intDate = "%Y%m%d"                       # YYYYMMDD format for comparison
            dbDate = int(dateObj.strftime(intDate))

        else:
            raise MyError, "SACATALOG table in Template database not found"

        del saCatalog
        del newDB

        return dbDate

    except MyError, e:
        # Example: raise MyError, "This is an error message"
        PrintMsg(str(e), 2)
        return 0

    except:
        errorMsg()
        return 0

## ===================================================================================
def GetTabularDate(newFolder):
    # Get string for SAVEREST date from tabular/sacatlog.txt file
    # Use it to compare with the date from the WSS dataset
    # If the existing database is same or newer, it will be kept and the WSS version skipped
    # The original string looks like this: 12/05/2013 23:44:00
    #
    # Return YYYYMMDD as integer

    try:
        tabDate = 0

        # Try finding the text file in the tabular folder and reading SAVEREST from that file.
        saCatalog = os.path.join(newFolder, r"tabular\sacatlog.txt")

        if arcpy.Exists(saCatalog):
            fh = open(saCatalog, "r")
            rec = fh.readline()
            fh.close()
            # Example date (which is index 3 in pipe-delimited file):  9/23/2014 6:49:27
            vals = rec.split("|")
            recDate = vals[3]
            wssDate = "%m/%d/%Y %H:%M:%S"  # string date format used for SAVEREST in text file
            intDate = "%Y%m%d"             # YYYYMMDD format for comparison
            dateObj = datetime.strptime(recDate, wssDate)
            tabDate = int(dateObj.strftime(intDate))

        else:
            PrintMsg(" \nUnable to find file: " + saCatalog, 1)

        return tabDate

    except:
        errorMsg()
        return tabDate

## ===================================================================================
def GetDownload(areasym, surveyDate, importDB):
    # download survey from Web Soil Survey URL and return name of the zip file
    # want to set this up so that download will retry several times in case of error
    # return empty string in case of complete failure. Allow main to skip a failed
    # survey, but keep a list of failures
    #
    # Only the version of zip file without a Template database is downloaded. The user
    # must have a locale copy of the Template database that has been modified to allow
    # automatic tabular imports.

    # create URL string from survey string and WSS 3.0 cache URL
    baseURL = "http://websoilsurvey.sc.egov.usda.gov/DSD/Download/Cache/SSA/"

    try:
        # List of states that use a Template database other than US_2003.
        # This list will have to be updated in the future if it is used to
        # get downloads with the Template database included in the zipfile.
        dbInfo = {'AK':'AK', 'CT':'CT', 'FL':'FL', 'GA':'GA', 'HI':'HI', 'IA':'IA', \
        'ID':'ID', 'IN':'IN', 'ME':'ME', 'MI':'MI', 'MN':'MN', 'MT':'MT', 'NC':'NC', \
        'NE':'NE', 'NJ':'NJ', 'OH':'OH', 'OR':'OR', 'PA':'PA', 'SD':'SD', 'UT':'UT', \
        'VT':'VT', 'WA':'WA', 'WI':'WI', 'WV':'WV', 'WY':'WY', 'FM':'HI', 'PB':'HI'}

        # Incorporate the name of the Template database into the URL
        st = areaSym[0:2]
        if st in dbInfo:
            db = "_soildb_" + dbInfo[st] + "_2003"
        else:
            db = "_soildb_US_2003"

        # Use this zipfile for downloads without the Template database
        zipDate = str(surveyDate)[0:4] + "-" + str(surveyDate)[4:6] + "-" + str(surveyDate)[6:8]
        zipName = "wss_SSA_" + areaSym + "_[" + str(zipDate) + "].zip"

        # Use this URL for downloads with the state or US_2003 database
        #zipName = "wss_SSA_" + areaSym + db + "_[" + surveyDate + "].zip"

        zipURL = baseURL + zipName

        PrintMsg("\tDownloading survey " + areaSym + " from Web Soil Survey...", 0)

        # Open request to Web Soil Survey for that zip file
        request = urlopen(zipURL)

        # set the download's output location and filename
        local_zip = os.path.join(outputFolder, zipName)

        # make sure the output zip file doesn't already exist
        if os.path.isfile(local_zip):
            os.remove(local_zip)

        # save the download file to the specified folder
        output = open(local_zip, "wb")
        output.write(request.read())
        output.close()
        del request
        del output

        # if we get this far then the download succeeded
        return zipName

    except URLError, e:
        if hasattr(e, 'reason'):
            PrintMsg("\t\t" + areaSym + " - URL Error: " + str(e.reason), 1)

        elif hasattr(e, 'code'):
            PrintMsg("\t\t" + zipName + " - " + e.msg + " (errorcode " + str(e.code) + ")", 1)

        return ""

    except socket.timeout, e:
        PrintMsg("\t\t" + areaSym + " - server timeout error", 1)
        return ""

    except socket.error, e:
        PrintMsg("\t\t" + areasym + " - Web Soil Survey connection failure", 1)
        return ""

    except:
        # problem deleting partial zip file after connection error?
        # saw some locked, zero-byte zip files associated with connection errors
        PrintMsg("\tFailed to download zipfile", 0)
        errorMsg()
        return ""
        sleep(1)
        return ""

## ===================================================================================
def CheckExistingDataset(areaSym, surveyDate, newFolder, newDB):

    try:
        bNewer = True  # Default setting should result in overwriting the current data if it already exists
        #PrintMsg(" \nChecking newFolder: " + newFolder, 1)

        if os.path.isdir(newFolder):
            # This survey appears to have already been downloaded. Check to see if it is complete.
            # If not complete, overwrite it.
            # Need to handle situations where Tabular data was not imported. Right now this will
            # throw an error!

            #bNewer = False  # Default setting should result in overwriting the current data if it already exists

            # Having a new issue with date comparisons. Do I need to re-order the date string
            # to YYYYMMDD in order to compare the filename date and the SAVEREST dates as integer?
            #
            # Another issue. If the data was previously downloaded but the option to use a
            # Template database was changed then it gets a little complicated. Should I bail
            # if the specified database doesn't exist instead of failing over to looking at the
            # date from the text file?
            #
            if newDB == "":
                # No tabular import will be performed, use the text file to get the date
                dbDate = GetTabularDate(newFolder)

            elif os.path.isfile(newDB):
                # Template database exists, get date from the SACATALOG table
                dbDate = GetTemplateDate(newDB)
                if dbDate == 0:
                    PrintMsg(" \nLocal dataset " + areaSym + " already exists but is incomplete", 1)

                else:
                    PrintMsg(" \nLocal dataset for " + areaSym + " already exists (date of " + str(dbDate) + ")", 0)

            else:
                # Missing database even though a path was given by the user
                PrintMsg("\tMissing database (" + newDB + ")", 1)
                dbDate = 0

            if dbDate == 0:
                # Could not get SAVEREST date from database, assume old dataset is incomplete and overwrite
                #PrintMsg("\tLocal dataset is incomplete and will be overwritten", 1)
                shutil.rmtree(newFolder, True)
                sleep(3)
                bNewer = True

                if arcpy.Exists(newFolder):
                    raise MyError, "Failed to delete old dataset (" + newFolder + ")"

            else:
                # Compare SDM date with local database date
                if surveyDate > dbDate:
                    # Downloaded data is newer than the local copy. Delete and replace with new data.
                    #
                    #PrintMsg("\tReplacing local dataset with newer download", 1)
                    bNewer = True
                    # delete old data folder
                    shutil.rmtree(newFolder, True)
                    sleep(3)

                    if arcpy.Exists(newFolder):
                        raise MyError, "Failed to delete old dataset (" + newFolder + ")"

                else:
                    # according to the filename-date, the WSS version is the same or older
                    # than the local Template DB, skip download for this survey
                    if surveyDate == dbDate:
                        PrintMsg(" \nSkipping survey " + areaSym + ", local version is already current", 1)

                    else:
                        PrintMsg(" \nSkipping survey " + areaSym + ", local version is newer (" + str(dbDate) + ") than the WSS data!?", 1)

                    bNewer = False

        else:
            # This is a new download
            bNewer = True

        return bNewer

    except MyError, e:
        # Example: raise MyError, "This is an error message"
        PrintMsg(str(e), 2)
        return False

    except:
        errorMsg()
        return False

## ===================================================================================
def ProcessSurvey(outputFolder, importDB, areaSym, bImport, bRemoveTXT, iGet, iTotal):
    # Download and import the specified SSURGO dataset

    try:
        survey = asDict[areaSym]
        env.workspace = outputFolder
        surveyInfo = survey.split(",")
        areaSym = surveyInfo[0].strip().upper()

        # get date string
        surveyDate = int(surveyInfo[1].strip().replace("-", ""))

        # get survey name
        surveyName = surveyInfo[2].strip()

        # set standard final path and name for template database
        newFolder = os.path.join(outputFolder, "soil_" + areaSym.lower())

        # set standard name and path for SSURGO Template database
        # Should I set this variable even when no import has been specified? I
        # think this is causing problems.
        if bImport:
            newDB = os.path.join(os.path.join(newFolder, "tabular"), "soil_d_" + areaSym.lower() + ".mdb")

        else:
            newDB = ""

        # check to make sure this survey hasn't already been downloaded
        # This database-check won't work if the user was not running the tabular import.
        # Need to add the option to look at the tabular text file to get the SAVEREST date
        # when bImport is False
        #
        bNewer = CheckExistingDataset(areaSym, surveyDate, newFolder, newDB)

        if bNewer:
            # Get new SSURGO download or replace an older version of the same survey
            # Otherwise skip download
            #
            PrintMsg(" \nProcessing survey " + areaSym + " (" + str(iGet) + " of " + str(iTotal) + "):  " + surveyName, 0)

            # First attempt to download zip file
            zipName = GetDownload(areaSym, surveyDate, importDB)

            if zipName == "" or zipName is None:
                # Try downloading zip file a second time
                sleep(5)
                zipName = GetDownload(areaSym, surveyDate, importDB)

                if zipName == "" or zipName is None:
                    # Failed second attempt to download zip file
                    # Give up on this survey
                    return "Failed"

            bZip = UnzipDownload(outputFolder, newFolder, importDB, zipName)

            if not bZip:
                # Try unzipping a second time
                sleep(1)
                bZip = UnzipDownload(outputFolder, newFolder, importDB, zipName)

                if not bZip:
                    # Failed second attempt to unzip
                    # Give up on this survey
                    return "Failed"

            # Import tabular. Only try once.
            if bImport:
                if not ImportTabular(areaSym, newFolder, importDB, newDB, bRemoveTXT):
                    # Bail clear out of the whole download process
                    return "Failed"

            return "Successful"

        else:
            # Existing local dataset is same age or newer than downloaded version
            # skip it
            return "Skipped"

    except MyError, e:
        # Example: raise MyError, "This is an error message"
        PrintMsg(str(e), 2)
        return "Failed"

    except:
        errorMsg()
        return "Failed"

## ===================================================================================
def UnzipDownload(outputFolder, newFolder, importDB, zipName ):
    # Given zip file name, try to unzip it

    try:
        local_zip = os.path.join(outputFolder, zipName)

        if os.path.isfile(local_zip):
            # got a zip file, go ahead and extract it
            zipSize = (os.stat(local_zip).st_size / (1024.0 * 1024.0))

            if zipSize > 0:

                # Download appears to be successful
                PrintMsg("\tUnzipping " + zipName + " (" + Number_Format(zipSize, 3, True) + " MB)...", 0)

                with zipfile.ZipFile(local_zip, "r") as z:
                    # a bad zip file returns exception zipfile.BadZipFile
                    z.extractall(outputFolder)

                # remove zip file after it has been extracted,
                # allowing a little extra time for file lock to clear
                sleep(3)
                os.remove(local_zip)

                # rename output folder to NRCS Geodata Standard for Soils
                if os.path.isdir(os.path.join(outputFolder, zipName[:-4])):
                    # this is an older zip file that has the 'wss_' directory structure
                    os.rename(os.path.join(outputFolder, zipName[:-4]), newFolder)

                elif os.path.isdir(os.path.join(outputFolder, areaSym.upper())):
                    # this must be a newer zip file using the uppercase AREASYMBOL directory
                    os.rename(os.path.join(outputFolder, areaSym.upper()), newFolder)

                elif os.path.isdir(newFolder):
                    # this is a future zip file using the correct field office naming convention (soil_ne109)
                    # it does not require renaming.
                    pass

                else:
                    # none of the subfolders within the zip file match any of the expected names
                    raise MyError, "Subfolder within the zip file does not match any of the standard names"

            else:
                # Downloaded a zero-byte zip file
                # download for this survey failed, may try again
                PrintMsg("\tEmpty zip file downloaded for " + areaSym + ": " + surveyName, 1)
                os.remove(local_zip)

            return True

        else:
            # Don't have a zip file, need to find out circumstances and document
            # rename downloaded database using standard convention, skip import
            raise MyError, "Missing zip file (" + local_zip + ")"
            return False

    except MyError, e:
        # Example: raise MyError, "This is an error message"
        PrintMsg(str(e), 2)
        return False

    except zipfile.BadZipfile:
        PrintMsg(" \nBad zip file?", 2)
        return False

    except:
        errorMsg()
        return False

## ===============================================================================================================
def GetTableInfo(newDB):
    # Adolfo's function
    #
    # Retrieve physical and alias names from MDSTATTABS table and assigns them to a blank dictionary.
    # Stores physical names (key) and aliases (value) in a Python dictionary i.e. {chasshto:'Horizon AASHTO,chaashto'}
    # Fieldnames are Physical Name = AliasName,IEfilename

    try:
        tblInfo = dict()

        # Open mdstattabs table containing information for other SSURGO tables
        theMDTable = "mdstattabs"
        env.workspace = newDB


        # Establishes a cursor for searching through field rows. A search cursor can be used to retrieve rows.
        # This method will return an enumeration object that will, in turn, hand out row objects
        if arcpy.Exists(os.path.join(newDB, theMDTable)):

            fldNames = ["tabphyname","tablabel","iefilename"]
            with arcpy.da.SearchCursor(os.path.join(newDB, theMDTable), fldNames) as rows:

                for row in rows:
                    # read each table record and assign 'tabphyname' and 'tablabel' to 2 variables
                    physicalName = row[0]
                    aliasName = row[1]
                    importFileName = row[2]

                    # i.e. {chaashto:'Horizon AASHTO',chaashto}; will create a one-to-many dictionary
                    # As long as the physical name doesn't exist in dict() add physical name
                    # as Key and alias as Value.
                    #if not physicalName in tblAliases:
                    if not importFileName in tblInfo:
                        #PrintMsg("\t" + importFileName + ": " + physicalName, 1)
                        tblInfo[importFileName] = physicalName, aliasName

            del theMDTable

            return tblInfo

        else:
            # The mdstattabs table was not found
            raise MyError, "Missing mdstattabs table"
            return tblInfo


    except MyError, e:
        # Example: raise MyError, "This is an error message"
        PrintMsg(str(e), 2)
        return tblInfo

    except:
        errorMsg()
        return tblInfo

## ===================================================================================
def SortMapunits(newDB):
    # Populate table 'SYSTEM - Mapunit Sort Specifications'. Required for Soil Data Viewer
    # Looks like an alpha sort on AREASYMBOL, then MUSYM will work to set
    # lseq and museq values within the "SYSTEM - Mapunit Sort Specifications" table
    #
    # Problem, this sort does not handle a mix of alpha and numeric musym values properly
    #
    # Populate table "SYSTEM - INTERP DEPTH SEQUENCE" from COINTERP using cointerpkey and seqnum
    #
    try:
        # Make query table using MAPUNIT and LEGEND tables and use it to assemble all
        # of the data elements required to create the "SYSTEM - Mapunit Sort Specification" table
        inputTbls = ["legend", "mapunit"]

        fldList = "legend.areasymbol areasymbol;legend.lkey lkey; mapunit.musym musym; mapunit.mukey mukey"
        sqlJoin = "mapunit.lkey = legend.lkey"
        queryTbl = "musorted"

        # Cleanup
        if arcpy.Exists(queryTbl):
            arcpy.Delete_management(queryTbl)

        # Find output SYSTEM table
        sysFields = ["lseq", "museq", "lkey", "mukey"]
        sysTbl = os.path.join(newDB, "SYSTEM - Mapunit Sort Specifications")
        if not arcpy.Exists(sysTbl):
            raise MyError, "Could not find " + sysTbl

        arcpy.MakeQueryTable_management(inputTbls, queryTbl, "ADD_VIRTUAL_KEY_FIELD", "", fldList, sqlJoin)

        # Open the query table, sorting on areasymbol
        #sqlClause = [None, "order by legend_areasymbol asc"]
        dMapunitSort = dict()  # dictionary to contain list of musyms for each survey. Will be sorted
        dMapunitData = dict()  # dictionary for containing all neccessary data for SYSTEM -Map Unit Sort Specification
        convert = lambda text: int(text) if text.isdigit() else text.lower()
        alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key)]

        with arcpy.da.SearchCursor(queryTbl, ["legend_areasymbol", "legend_lkey", "mapunit_musym", "mapunit_mukey"]) as cur:
            for rec in cur:
                areaSym = rec[0].encode('ascii')
                lkey = rec[1].encode('ascii')
                musym = rec[2].encode('ascii')
                mukey = rec[3].encode('ascii')

                # Append muysm values to dictionary by areasymbol key
                if areaSym in dMapunitSort:
                    musymList = dMapunitSort[areaSym]
                    musymList.append(musym)
                    dMapunitSort[areaSym] = musymList

                else:
                    dMapunitSort[areaSym] = [musym]

                # store legend and map unit keys by areasymbol and map unit symbol
                dMapunitData[(areaSym, musym)] = (lkey, mukey)

        # Iterate through dMapunitSort dictionary, sorting muysm values
        areaList = sorted(dMapunitSort.keys())  # sorted list of areasymbols
        lseq = 0
        mseq = 0

        # Now read the dictionary back out in sorted order and populate the SYSTEM - Mapunit Sort Specifications table
        #
        with arcpy.da.InsertCursor(sysTbl, "*") as outCur:

            for areaSym in areaList:
                #PrintMsg(" \nProcessing survey: " + areaSym, 1)
                lseq += 1
                musymList = sorted(dMapunitSort[areaSym], key = alphanum_key)

                for musym in musymList:
                    mseq += 1
                    mKey = (areaSym, musym)
                    lkey, mukey = dMapunitData[(areaSym, musym)]
                    outrec = lseq, mseq, lkey, mukey
                    outCur.insertRow(outrec)


        # Populate "SYSTEM - INTERP DEPTH SEQUENCE" fields: cointerpkey and depthseq
        # from COINTERP fields: cointerpkey and seqnum
        # I am assuming that the cointerp table is already sorted. Is that safe??
        #
        #PrintMsg("\tUpdating SYSTEM - Interp Depth Sequence", 1)
        inTbl = os.path.join(newDB, "cointerp")
        inFlds = ["cointerpkey", "seqnum"]
        outTbl = os.path.join(newDB, "SYSTEM - INTERP DEPTH SEQUENCE")
        outFlds = ["cointerpkey", "depthseq"]
        interpSQL = "ruledepth = 1"

        with arcpy.da.SearchCursor(inTbl, inFlds, interpSQL) as sCur:
            outCur = arcpy.da.InsertCursor(outTbl, outFlds)

            for inRec in sCur:
                outCur.insertRow(inRec)

        return True

    except MyError, e:
        # Example: raise MyError, "This is an error message"
        PrintMsg(str(e), 2)
        return False

    except:
        errorMsg()
        return False

## ===================================================================================
def ImportTabular(areaSym, newFolder, importDB, newDB, bRemoveTXT):
    # Given zip file name, try to unzip it
    #
    # Problem with SACATALOG tabular for OR628 as of 12-12-2014
    # Seemed to work OK for my laptop, but failed several times on workstation
    # when output was to network share.

    try:
        # get database name from file listing in the new folder
        env.workspace = newFolder

        # move to tabular folder
        env.workspace = os.path.join(newFolder, "tabular")

        # copy over master database and run tabular import
        PrintMsg("\tCopying selected master template database to tabular folder...", 0)

        # copy user specified database to the new folder
        shutil.copy2(importDB, newDB)

        # Run Auto_Import routine which will import the tabular data from text files
        PrintMsg("\tImporting textfiles into new database " + os.path.basename(newDB) + "...", 0)

        # Using Adolfo's csv reader method to import tabular data from text files...
        tabularFolder = os.path.join(newFolder, "tabular")

        # if the tabular directory is empty return False
        if len(os.listdir(tabularFolder)) < 1:
            raise MyError, "No text files found in the tabular folder"

        if not SSURGOVersion(newDB, tabularFolder):
            raise MyError, ""

        # Create a dictionary with table information
        tblInfo = GetTableInfo(newDB)

        if len(tblInfo) == 0:
            raise MyError, "Failed to get information from mdstattabs table"

        # Create a list of textfiles to be imported. The import process MUST follow the
        # order in this list in order to maintain referential integrity. This list
        # will need to be updated if the SSURGO data model is changed in the future.
        #
        txtFiles = ["distmd","legend","distimd","distlmd","lareao","ltext","mapunit", \
        "comp","muaggatt","muareao","mucrpyd","mutext","chorizon","ccancov","ccrpyd", \
        "cdfeat","cecoclas","ceplants","cerosnac","cfprod","cgeomord","chydcrit", \
        "cinterp","cmonth", "cpmatgrp", "cpwndbrk","crstrcts","csfrags","ctxfmmin", \
        "ctxmoicl","ctext","ctreestm","ctxfmoth","chaashto","chconsis","chdsuffx", \
        "chfrags","chpores","chstrgrp","chtext","chtexgrp","chunifie","cfprodo","cpmat","csmoist", \
        "cstemp","csmorgc","csmorhpp","csmormr","csmorss","chstr","chtextur", \
        "chtexmod","sacatlog","sainterp","sdvalgorithm","sdvattribute","sdvfolder","sdvfolderattribute"]
        # Need to add featdesc import as a separate item (ie. spatial\soilsf_t_al001.txt: featdesc)

        # Static Metadata Table that records the metadata for all columns of all tables
        # that make up the tabular data set.
        mdstattabsTable = os.path.join(env.workspace, "mdstattabs")

        # set progressor object which allows progress information to be passed for every merge complete
        arcpy.SetProgressor("step", "Importing tabular data", 0, len(txtFiles) + 2, 1)

        # Need to import text files in a specific order or the MS Access database will
        # return an error due to table relationships and key violations

        # Problem with length of some memo fields, need to allocate more memory
        #csv.field_size_limit(sys.maxsize)
        csv.field_size_limit(512000)

        for txtFile in txtFiles:

            # Get table name and alias from dictionary
            if txtFile in tblInfo:
                tbl, aliasName = tblInfo[txtFile]

            else:
                raise MyError, "Textfile reference '" + txtFile + "' not found in 'mdstattabs table'"

            arcpy.SetProgressorLabel("Importing " + tbl + "...")

            # Full path to SSURGO text file
            txtPath = os.path.join(tabularFolder, txtFile + ".txt")

            # continue if the target table exists
            if arcpy.Exists(tbl):

                # Create cursor for all fields to populate the current table
                with arcpy.da.InsertCursor(tbl, "*") as cursor:
                    # counter for current record number
                    iRows = 1

                    try:
                        # Use csv reader to read each line in the text file
                        for row in csv.reader(open(txtPath, 'rb'), delimiter='|', quotechar='"'):
                            # replace all blank values with 'None' so that the values are properly inserted
                            # into integer values otherwise insertRow fails
                            newRow = [None if value == '' else value for value in row]
                            cursor.insertRow(newRow)
                            iRows += 1

                    except:
                        errorMsg()
                        raise MyError, "Error loading line no. " + Number_Format(iRows, 0, True) + " of " + txtFile + ".txt"

            else:
                raise MyError, "Required table '" + tbl + "' not found in " + newDB

            arcpy.SetProgressorPosition()

        # Import feature description file
        # soilsf_t_al001.txt
        spatialFolder = os.path.join(os.path.dirname(tabularFolder), "spatial")
        txtFile ="soilsf_t_" + areaSym
        txtPath = os.path.join(spatialFolder, txtFile + ".txt")
        tbl = "featdesc"

        # Create cursor for all fields to populate the featdesc table
        with arcpy.da.InsertCursor(tbl, "*") as cursor:
            # counter for current record number
            iRows = 1
            arcpy.SetProgressorLabel(tbl + "...")

            try:
                # Use csv reader to read each line in the text file
                for rowInFile in csv.reader(open(txtPath, 'rb'), delimiter='|', quotechar='"'):
                    # replace all blank values with 'None' so that the values are properly inserted
                    # into integer values otherwise insertRow fails
                    newRow = [None if value == '' else value for value in rowInFile]
                    cursor.insertRow(newRow)
                    iRows += 1

            except:
                errorMsg()
                raise MyError, "Error loading line no. " + Number_Format(iRows, 0, True) + " of " + txtFile + ".txt"

        arcpy.SetProgressorPosition()  # for featdesc table

        # Sort map units for Soil Data Viewer SYSTEM table
        arcpy.SetProgressorLabel("Sorting map units ...")
        bSorted = SortMapunits(newDB)

        if bSorted == False:
            raise MyError, ""

        arcpy.SetProgressorPosition()  # for map unit sort

        # Check the database to make sure that it completed properly, with at least the
        # SAVEREST date populated in the SACATALOG table. Added this primarily to halt
        # processing when the user forgets to set the Trusted Location in MS Access.
        dbDate = GetTemplateDate(newDB)

        if dbDate == 0:
            # With this error, it would be best to bailout and fix the problem before proceeding
            raise MyError, "Failed to import tabular data"

        else:
            # Compact database (~30% reduction in mdb filesize)
            try:
                arcpy.SetProgressorLabel("Compacting database ...")
                sleep(2)
                arcpy.Compact_management(newDB)
                sleep(1)
                PrintMsg("\tCompacted database", 0)

            except:
                # Sometimes ArcGIS is unable to compact (locked database?)
                # Usually restarting the ArcGIS application fixes this problem
                PrintMsg("\tUnable to compact database", 1)

            # Set the Progressor to show completed status
            arcpy.ResetProgressor()
            arcpy.SetProgressorLabel("Tabular import complete")

            # Import SSURGO metadata for shapefiles
            bNamed = AddMuName(newFolder)

            # Remove all the text files from the tabular folder
            if bRemoveTXT:
                txtList = glob.glob(os.path.join(tabularFolder, "*.txt"))
                PrintMsg("\tRemoving textfiles...", 0)

                for txtFile in txtList:
                    if not txtFile.endswith("version.txt"):
                        os.remove(txtFile)

        return True

    except MyError, e:
        # Example: raise MyError, "This is an error message"
        PrintMsg(str(e), 2)
        return False

    except:
        errorMsg()
        return False

## ===================================================================================
def AddMuName(newFolder):
    # Add muname column (map unit name) to soil polygon shapefile
    #
    # Started having problems with Addfield when the shapefile is on a Network Share.
    # Could it be virus scan locking the table??
    # No system or geoprocessing error message is displayed since this is not a serious problem
    #
    try:

        # Add MuName to mapunit polygon shapefile using mapunit.txt
        muDict = dict()

        tabPath = os.path.join(newFolder, "tabular")
        muTxt = os.path.join(tabPath, "mapunit.txt")
        spatialFolder = os.path.join(newFolder, "spatial")
        env.workspace = spatialFolder

        if not arcpy.Exists(muTxt):
            raise MyError, "Cannot find " + muTxt

        # Some of the tabular only shapefiles on WSS were created as polyline instead of
        # polygon. This situation will cause the next line to fail with index out of range
        shpList = arcpy.ListFeatureClasses("soilmu_a*", "Polygon")

        if len(shpList) == 1:
            try:
                # Make failure to add muname a warning rather than a failure
                # Have had this occur several times for unknown reason. Virus scan file lock?
                # Seems to happen more frequently on network share.
                #
                muShp = shpList[0]
                PrintMsg("\tAdding MUNAME to " + muShp, 0)
                # add muname column to shapefile

                try:
                    arcpy.AddField_management (muShp, "MUNAME", "TEXT", "", "", 175)

                except:
                    # wait one second, then try again
                    sleep(1)
                    arcpy.AddField_management (muShp, "MUNAME", "TEXT", "", "", 175)
                    sleep(1)

                # read mukey and muname into dictionary from mapunit.txt file
                with open(muTxt, 'r') as f:
                    data = f.readlines()

                for rec in data:
                    s = rec.replace('"','')
                    muList = s.split("|")
                    muDict[muList[len(muList) - 1].strip()] = muList[1]

                # update shapefile muname column using dictionary
                with arcpy.da.UpdateCursor(muShp, ("MUKEY","MUNAME")) as upCursor:
                    for rec in upCursor:
                        rec[1] = muDict[rec[0]]
                        upCursor.updateRow (rec)

                del muTxt, data, muDict

                # import FGDC metadata to mapunit polygon shapefile
                spatialFolder = os.path.join(newFolder, "spatial")
                env.workspace = spatialFolder
                shpList = arcpy.ListFeatureClasses("soilmu_a*", "Polygon")

                if len(shpList) == 1:
                    muShp = shpList[0]
                    PrintMsg("\tImporting metadata for " + muShp, 0)
                    arcpy.SetProgressorLabel("Importing metadata...")
                    metaData = os.path.join(newFolder, "soil_metadata_" + areaSym.lower() + ".xml")
                    arcpy.ImportMetadata_conversion(metaData, "FROM_FGDC", os.path.join(spatialFolder, muShp), "ENABLED")
                    del spatialFolder, muShp, metaData

                    # remove log file
                    # soil_metadata_ne137_xslttran.log
                    logFile = os.path.join(os.path.dirname(env.scratchFolder), "soil_metadata_" + areaSym.lower() + "_xslttran.log")

                    if arcpy.Exists(logFile):
                        arcpy.Delete_management(logFile, "File")

            except:
                PrintMsg("\tFailed to add MUNAME column to shapefile", 1)

            return True

        else:
            PrintMsg("\tMap unit polygon shapefile not found, 'Tabular-Only' survey?", 2)
            return False

    except MyError, e:
        # Example: raise MyError, "This is an error message"
        PrintMsg(str(e), 2)
        return False

    except:
        #errorMsg()
        return False

## ===================================================================================
# main
# Import system modules
import arcpy, sys, os, locale, string, traceback, shutil, zipfile, subprocess, glob, socket, csv, re
from urllib2 import urlopen, URLError, HTTPError
from arcpy import env
from _winreg import *
from datetime import datetime
from time import sleep

try:
    arcpy.OverwriteOutput = True

    # Script arguments...
    wc = arcpy.GetParameter(0)
    dateFilter = arcpy.GetParameter(1)
    outputFolder = arcpy.GetParameterAsText(2)
    surveyList = arcpy.GetParameter(3)
    importDB = arcpy.GetParameterAsText(4)
    bRemoveTXT = arcpy.GetParameter(5)

    # Set tabular import to False if no Template database is specified
    if importDB == "":
        PrintMsg(" \nWarning! Tabular import turned off (no database specified)", 1)
        bImport = False

    else:
        bImport = True

    # initialize error and progress trackers
    failedList = list()  # track list of failed downloads
    failedCnt = 0        # track consecutive failures
    skippedList = list() # track list of downloads that were skipped because a newer version already exists
    goodList = list()    # list of successful surveys
    iGet = 0

    PrintMsg(" \n" + str(len(surveyList)) + " soil survey(s) selected for Web Soil Survey download", 0)

    # set workspace to output folder
    env.workspace = outputFolder

    # Create ordered list by Areasymbol
    asList = list()
    asDict = dict()

    for survey in surveyList:
        env.workspace = outputFolder
        surveyInfo = survey.split(",")
        areaSym = surveyInfo[0].strip().upper()
        asList.append(areaSym)
        asDict[areaSym] = survey

    asList.sort()

    arcpy.SetProgressor("step", "Downloading SSURGO data...",  0, len(asList), 1)

    # Proccess list of areasymbols
    #
    for areaSym in asList:
        #
        # Run import process in order of listed Areasymbol values
        #
        iGet += 1

        # Run import process
        iTotal = len(asList)
        arcpy.SetProgressorLabel("Downloading survey " + areaSym + " from Web Soil Survey  (number " + str(iGet) + " of " + str(len(asList)) + " total)")
        bProcessed = ProcessSurvey(outputFolder, importDB, areaSym, bImport, bRemoveTXT, iGet, iTotal)

        if bProcessed == "Failed":
            failedList.append(areaSym)
            failedCnt += 1
            #raise MyError, ""

        elif bProcessed == "Skipped":
            skippedList.append(areaSym)

        elif bProcessed == "Successful":
            # download successful
            failedCnt = 0
            goodList.append(areaSym)

        if failedCnt > 4:
            raise MyError, "Five consecutive download failures, bailing out"

        if len(failedList) > 24:
            raise MyError, "Twenty-five download failures, bailing out"

        arcpy.SetProgressorPosition()

    if len(failedList) > 0 or len(skippedList) > 0:
        PrintMsg(" \nDownload process completed (" + Number_Format(len(goodList), 0, True) + " succeeded) with the following issues...", 1)

    else:
        if importDB:
            PrintMsg(" \nAll " + Number_Format(len(asList), 0, True) + " surveys succcessfully downloaded, tabular import process complete", 0)

        else:
            PrintMsg(" \nAll " + Number_Format(len(asList), 0, True) + " surveys succcessfully downloaded (no tabular import)", 0)

    arcpy.SetProgressorLabel("Processing complete...")
    env.workspace = outputFolder

except MyError, e:
    # Example: raise MyError, "This is an error message"
    PrintMsg(str(e), 2)

except:
    errorMsg()

finally:
    if len(failedList) > 0:
        PrintMsg(" \n\tWSS download failed for: " + ", ".join(failedList), 2)

    if len(skippedList) > 0:
        PrintMsg(" \n\tSkipped because a current version already exists: " + ", ".join(skippedList), 1)

    PrintMsg(" ", 0)