python source code of cos

# -*- coding=utf-8

from six import text_type, binary_type, string_types
from six.moves.urllib.parse import quote, unquote
import hashlib
import base64
import os
import io
import re
import sys
import xml.dom.minidom
import xml.etree.ElementTree
from datetime import datetime
from dicttoxml import dicttoxml
from .xml2dict import Xml2Dict
from .cos_exception import CosClientError
from .cos_exception import CosServiceError

SINGLE_UPLOAD_LENGTH = 5*1024*1024*1024  # 单次上传文件最大为5GB
DEFAULT_CHUNK_SIZE = 1024*1024           # 计算MD5值时,文件单次读取的块大小为1MB
# kwargs中params到http headers的映射
maplist = {
            'ContentLength': 'Content-Length',
            'ContentMD5': 'Content-MD5',
            'ContentType': 'Content-Type',
            'CacheControl': 'Cache-Control',
            'ContentDisposition': 'Content-Disposition',
            'ContentEncoding': 'Content-Encoding',
            'ContentLanguage': 'Content-Language',
            'Expires': 'Expires',
            'ResponseContentType': 'response-content-type',
            'ResponseContentLanguage': 'response-content-language',
            'ResponseExpires': 'response-expires',
            'ResponseCacheControl': 'response-cache-control',
            'ResponseContentDisposition': 'response-content-disposition',
            'ResponseContentEncoding': 'response-content-encoding',
            'Metadata': 'Metadata',
            'ACL': 'x-cos-acl',
            'GrantFullControl': 'x-cos-grant-full-control',
            'GrantWrite': 'x-cos-grant-write',
            'GrantRead': 'x-cos-grant-read',
            'StorageClass': 'x-cos-storage-class',
            'Range': 'Range',
            'IfMatch': 'If-Match',
            'IfNoneMatch': 'If-None-Match',
            'IfModifiedSince': 'If-Modified-Since',
            'IfUnmodifiedSince': 'If-Unmodified-Since',
            'CopySourceIfMatch': 'x-cos-copy-source-If-Match',
            'CopySourceIfNoneMatch': 'x-cos-copy-source-If-None-Match',
            'CopySourceIfModifiedSince': 'x-cos-copy-source-If-Modified-Since',
            'CopySourceIfUnmodifiedSince': 'x-cos-copy-source-If-Unmodified-Since',
            'VersionId': 'versionId',
            'ServerSideEncryption': 'x-cos-server-side-encryption',
            'SSECustomerAlgorithm': 'x-cos-server-side-encryption-customer-algorithm',
            'SSECustomerKey': 'x-cos-server-side-encryption-customer-key',
            'SSECustomerKeyMD5': 'x-cos-server-side-encryption-customer-key-MD5',
            'SSEKMSKeyId': 'x-cos-server-side-encryption-cos-kms-key-id',
            'Referer': 'Referer',
            'PicOperations': 'Pic-Operations',
            'TrafficLimit': 'x-cos-traffic-limit',
           }


def to_str(s):
    """非字符串转换为字符串"""
    if isinstance(s, text_type) or isinstance(s, binary_type):
        return s
    return str(s)


def to_unicode(s):
    """将字符串转为unicode"""
    if isinstance(s, binary_type):
        try:
            return s.decode('utf-8')
        except UnicodeDecodeError as e:
            raise CosClientError('your bytes strings can not be decoded in utf8, utf8 support only!')
    return s


def to_bytes(s):
    """将字符串转为bytes"""
    if isinstance(s, text_type):
        try:
            return s.encode('utf-8')
        except UnicodeEncodeError as e:
            raise CosClientError('your unicode strings can not encoded in utf8, utf8 support only!')
    return s


def get_raw_md5(data):
    """计算md5 md5的输入必须为bytes"""
    data = to_bytes(data)
    m2 = hashlib.md5(data)
    etag = '"' + str(m2.hexdigest()) + '"'
    return etag


def get_md5(data):
    """计算 base64 md5 md5的输入必须为bytes"""
    data = to_bytes(data)
    m2 = hashlib.md5(data)
    MD5 = base64.standard_b64encode(m2.digest())
    return MD5


def get_content_md5(body):
    """计算任何输入流的md5值"""
    if isinstance(body, text_type) or isinstance(body, binary_type):
        return get_md5(body)
    elif hasattr(body, 'tell') and hasattr(body, 'seek') and hasattr(body, 'read'):
        file_position = body.tell()  # 记录文件当前位置
        # avoid OOM
        md5 = hashlib.md5()
        chunk = body.read(DEFAULT_CHUNK_SIZE)
        while chunk:
            md5.update(to_bytes(chunk))
            chunk = body.read(DEFAULT_CHUNK_SIZE)
        md5_str = base64.standard_b64encode(md5.digest())
        try:
            body.seek(file_position)  # 恢复初始的文件位置
        except Exception as e:
            raise CosClientError('seek unsupported to calculate md5!')
        return md5_str
    else:
        raise CosClientError('unsupported body type to calculate md5!')
    return None


def dict_to_xml(data):
    """V5使用xml格式，将输入的dict转换为xml"""
    doc = xml.dom.minidom.Document()
    root = doc.createElement('CompleteMultipartUpload')
    doc.appendChild(root)

    if 'Part' not in data:
        raise CosClientError("Invalid Parameter, Part Is Required!")

    for i in data['Part']:
        nodePart = doc.createElement('Part')

        if 'PartNumber' not in i:
            raise CosClientError("Invalid Parameter, PartNumber Is Required!")

        nodeNumber = doc.createElement('PartNumber')
        nodeNumber.appendChild(doc.createTextNode(str(i['PartNumber'])))

        if 'ETag' not in i:
            raise CosClientError("Invalid Parameter, ETag Is Required!")

        nodeETag = doc.createElement('ETag')
        nodeETag.appendChild(doc.createTextNode(str(i['ETag'])))

        nodePart.appendChild(nodeNumber)
        nodePart.appendChild(nodeETag)
        root.appendChild(nodePart)
    return doc.toxml('utf-8')


def xml_to_dict(data, origin_str="", replace_str=""):
    """V5使用xml格式，将response中的xml转换为dict"""
    root = xml.etree.ElementTree.fromstring(data)
    xmldict = Xml2Dict(root)
    xmlstr = str(xmldict)
    xmlstr = xmlstr.replace("{http://www.qcloud.com/document/product/436/7751}", "")
    xmlstr = xmlstr.replace("{https://cloud.tencent.com/document/product/436}", "")
    xmlstr = xmlstr.replace("{http://doc.s3.amazonaws.com/2006-03-01}", "")
    xmlstr = xmlstr.replace("{http://s3.amazonaws.com/doc/2006-03-01/}", "")
    xmlstr = xmlstr.replace("{http://www.w3.org/2001/XMLSchema-instance}", "")
    if origin_str:
        xmlstr = xmlstr.replace(origin_str, replace_str)
    xmldict = eval(xmlstr)
    return xmldict


def get_id_from_xml(data, name):
    """解析xml中的特定字段"""
    tree = xml.dom.minidom.parseString(data)
    root = tree.documentElement
    result = root.getElementsByTagName(name)
    # use childNodes to get a list, if has no child get itself
    return result[0].childNodes[0].nodeValue


def mapped(headers):
    """S3到COS参数的一个映射"""
    _headers = dict()
    for i in headers:
        if i in maplist:
            if i == 'Metadata':
                for meta in headers[i]:
                    _headers[meta] = headers[i][meta]
            else:
                _headers[maplist[i]] = headers[i]
        else:
            raise CosClientError('No Parameter Named ' + i + ' Please Check It')
    return _headers


def format_xml(data, root, lst=list(), parent_child=False):
    """将dict转换为xml, xml_config是一个bytes"""
    if parent_child:
        xml_config = dicttoxml(data, item_func=lambda x: x[:-1], custom_root=root, attr_type=False)
    else:
        xml_config = dicttoxml(data, item_func=lambda x: x, custom_root=root, attr_type=False)
    for i in lst:
        xml_config = xml_config.replace(to_bytes(i+i), to_bytes(i))
    return xml_config


def format_values(data):
    """格式化headers和params中的values为bytes"""
    for i in data:
        data[i] = to_bytes(data[i])
    return data


def format_endpoint(endpoint, region):
    """格式化终端域名"""
    if not endpoint and not region:
        raise CosClientError("Region or Endpoint is required not empty!")
    if not endpoint:
        region = format_region(region)
        return u"{region}.myqcloud.com".format(region=region)
    else:
        return to_unicode(endpoint)


def format_region(region):
    """格式化地域"""
    if not isinstance(region, string_types):
        raise CosClientError("region is not string type")
    if not region:
        raise CosClientError("region is required not empty!")
    region = to_unicode(region)
    if not re.match(r'^[A-Za-z0-9][A-Za-z0-9.\-]*[A-Za-z0-9]$', region):
        raise CosClientError("region format is illegal, only digit, letter and - is allowed!")
    if region.find(u'cos.') != -1:
        return region  # 传入cos.ap-beijing-1这样显示加上cos.的region
    if region == u'cn-north' or region == u'cn-south' or region == u'cn-east' or region == u'cn-south-2' or region == u'cn-southwest' or region == u'sg':
        return region  # 老域名不能加cos.
    #  支持v4域名映射到v5
    if region == u'cossh':
        return u'cos.ap-shanghai'
    if region == u'cosgz':
        return u'cos.ap-guangzhou'
    if region == 'cosbj':
        return u'cos.ap-beijing'
    if region == 'costj':
        return u'cos.ap-beijing-1'
    if region == u'coscd':
        return u'cos.ap-chengdu'
    if region == u'cossgp':
        return u'cos.ap-singapore'
    if region == u'coshk':
        return u'cos.ap-hongkong'
    if region == u'cosca':
        return u'cos.na-toronto'
    if region == u'cosger':
        return u'cos.eu-frankfurt'

    return u'cos.' + region  # 新域名加上cos.


def format_bucket(bucket, appid):
    """兼容新老bucket长短命名,appid为空默认为长命名,appid不为空则认为是短命名"""
    if not isinstance(bucket, string_types):
        raise CosClientError("bucket is not string")
    if not bucket:
        raise CosClientError("bucket is required not empty")
    if not (re.match(r'^[A-Za-z0-9][A-Za-z0-9\-]*[A-Za-z0-9]$', bucket) or re.match('^[A-Za-z0-9]$', bucket)):
        raise CosClientError("bucket format is illegal, only digit, letter and - is allowed!")
    # appid为空直接返回bucket
    if not appid:
        return to_unicode(bucket)
    if not isinstance(appid, string_types):
        raise CosClientError("appid is not string")
    bucket = to_unicode(bucket)
    appid = to_unicode(appid)
    # appid不为空,检查是否以-appid结尾
    if bucket.endswith(u"-"+appid):
        return bucket
    return bucket + u"-" + appid


def format_path(path):
    """检查path是否合法,格式化path"""
    if not isinstance(path, string_types):
        raise CosClientError("key is not string")
    if not path:
        raise CosClientError("Key is required not empty")
    path = to_unicode(path)
    if path[0] == u'/':
        path = path[1:]
    # 提前对path进行encode
    path = quote(to_bytes(path), b'/-_.~')
    return path


def get_copy_source_info(CopySource):
    """获取拷贝源的所有信息"""
    appid = u""
    versionid = u""
    region = u""
    endpoint = u""
    if 'Appid' in CopySource:
        appid = CopySource['Appid']
    if 'Bucket' in CopySource:
        bucket = CopySource['Bucket']
        bucket = format_bucket(bucket, appid)
    else:
        raise CosClientError('CopySource Need Parameter Bucket')
    if 'Region' in CopySource:
        region = CopySource['Region']
    if 'Endpoint' in CopySource:
        endpoint = CopySource['Endpoint']
    endpoint = format_endpoint(endpoint, region)
    if 'Key' in CopySource:
        path = to_unicode(CopySource['Key'])
        if path and path[0] == '/':
            path = path[1:]
    else:
        raise CosClientError('CopySource Need Parameter Key')
    if 'VersionId' in CopySource:
        versionid = to_unicode(CopySource['VersionId'])
    return bucket, path, endpoint, versionid


def gen_copy_source_url(CopySource):
    """拼接拷贝源url"""
    bucket, path, endpoint, versionid = get_copy_source_info(CopySource)
    path = format_path(path)
    if versionid != u'':
        path = path + u'?versionId=' + versionid
    url = u"{bucket}.{endpoint}/{path}".format(
            bucket=bucket,
            endpoint=endpoint,
            path=path
            )
    return url


def gen_copy_source_range(begin_range, end_range):
    """拼接bytes=begin-end形式的字符串"""
    range = u"bytes={first}-{end}".format(
            first=to_unicode(begin_range),
            end=to_unicode(end_range)
            )
    return range


def get_file_like_object_length(data):
    try:
        total_length = os.fstat(data.fileno()).st_size
    except IOError:
        if hasattr(data, '__len__'):
            total_length = len(data)
        else:
            # support BytesIO file-like object
            total_length = len(data.getvalue())
    try:
        current_position = data.tell()
    except IOError:
        current_position = 0
    content_len = total_length - current_position
    return content_len


def check_object_content_length(data):
    """put_object接口和upload_part接口的文件大小不允许超过5G"""
    content_len = 0
    if isinstance(data, text_type) or isinstance(data, binary_type):
        content_len = len(to_bytes(data))
    elif hasattr(data, 'fileno') and hasattr(data, 'tell'):
        content_len = get_file_like_object_length(data)
    else:
        # can not get the content-length, use chunked to upload the file
        pass
    if content_len > SINGLE_UPLOAD_LENGTH:
        raise CosClientError('The object size you upload can not be larger than 5GB in put_object or upload_part')
    return None


def format_dict(data, key_lst):
    """转换返回dict中的可重复字段为list"""
    if not (isinstance(data, dict) and isinstance(key_lst, list)):
        return data
    for key in key_lst:
        # 将dict转为list，保持一致
        if key in data and (isinstance(data[key], dict) or isinstance(data[key], str)):
            lst = []
            lst.append(data[key])
            data[key] = lst
    return data


def decode_result(data, key_lst, multi_key_list):
    """decode结果中的字段"""
    for key in key_lst:
        if key in data and data[key]:
            data[key] = unquote(data[key])
    for multi_key in multi_key_list:
        if multi_key[0] in data:
            for item in data[multi_key[0]]:
                if multi_key[1] in item and item[multi_key[1]]:
                    item[multi_key[1]] = unquote(item[multi_key[1]])
    return data


def get_date(yy, mm, dd):
    """获取lifecycle中Date字段"""
    date_str = datetime(yy, mm, dd).isoformat()
    final_date_str = date_str+'+08:00'
    return final_date_str


class CiDetectType():
    """ci内容设备的类型设置,可与操作设多个"""
    PORN = 1
    TERRORIST = 2
    POLITICS = 4
    ADS = 8