python 操做boto3操做s3

定時任務實時生成pdf,將文件tornado用異步io上傳到s3,有幾個坑記錄下:python

import re
import boto3
import logging
from multiprocessing.dummy import Pool as ThreadPool

AWS_REGION_NAME = 'cn-north-1'
AWS_S3_ACCESS_KEY_ID = ""
AWS_S3_SECRET_ACCESS_KEY = ""
AWS_S3_WQS_BUCKET = ""
service_name="s3"

LOG_OUT_FILENAME = "/var/log/fixs3_out.log"
LOG_ERROR_FILENAME = "/var/log/fixs3_err.log"

def setup_logger(logger_name, log_file, level=logging.INFO):
    l = logging.getLogger(logger_name)
    formatter = logging.Formatter(fmt='%(asctime)s.%(msecs)03d [ %(name)s:%(process)d ] - [ %(levelname)s ] %(message)s',
                                  datefmt='%Y-%m-%d %H:%M:%S')
    fileHandler = logging.FileHandler(log_file, mode='w')
    fileHandler.setFormatter(formatter)
    streamHandler = logging.StreamHandler()
    streamHandler.setFormatter(formatter)

    l.setLevel(level)
    l.addHandler(fileHandler)
    l.addHandler(streamHandler)
setup_logger('log_out', LOG_OUT_FILENAME)
out_logger = logging.getLogger('log_out')


class S3StoryOperate(object):
    def __init__(self):
        try:
            session = boto3.session.Session(aws_access_key_id=AWS_S3_ACCESS_KEY_ID,
                                            aws_secret_access_key=AWS_S3_SECRET_ACCESS_KEY,
                                            region_name=AWS_REGION_NAME)
            self.resource = session.resource('s3', AWS_REGION_NAME)

        except Exception as ex:
            print ex
            exit(ex)

    def get_all_key(self,bucket_name):
        keys=[]
        for obj in self.resource.Bucket(bucket_name).objects.all():
            keys.append(obj.key)
        return keys


    def rename_all_key(self):
        old_names=self.get_all_key(AWS_S3_WQS_BUCKET)
        pool = ThreadPool(4)

        def func(old_name):
            fix_result = self.fix_path(old_name)
            if fix_result:
                new_path = fix_result[1]
                self.resource.Object(AWS_S3_WQS_BUCKET, new_path).copy_from(
                    CopySource=AWS_S3_WQS_BUCKET + "/" + old_name)
                self.resource.Object(AWS_S3_WQS_BUCKET, old_name).delete()
        try:
            pool.map(func,old_names)
            pool.close()
            pool.join()
        except Exception as ex:
            print ex
            exit(ex)

    def fix_path(self,old_path):
        p=re.compile("/p(0|[1-9]\d?|[1-9]\d\d?|1000)-")
        result=re.search(p,old_path)
        if result:
            page_num=result.group().replace("/p","").replace("-","")
        else:
            return
        new_path=p.sub("/",old_path)
        out=(old_path,new_path,page_num)
        out_logger.info(out)
        return out

    def uploadfile(self,bucket,objkey,data):
        try:
            file_obj = self.resource.Bucket(bucket).put_object(Key=objkey, Body=data)
        except Exception as ex:
            print ex
            exit(ex)





if __name__=="__main__":
    s3=S3StoryOperate()
    s3.rename_all_key()
    #s3.fix_path("szyz/unpaid/20170328/11/SX/220/214324456/d5e0fa9257b345faacd6959979265428/SZYZ20152055_96e3cbfd7dd74b6d88e0e968f5ca38b9.pdf")

1,boto3可能connect不上,文檔沒有提示,必定要設置時區,否則會403,報access deny;session

2, boto3沒有rename 接口文檔,在stackoverflow上面看了下,最好也是最快的解決方案就是copy+deleded。app

3,提供copy的接口有幾個,一個是bucket提供接口,一個是client提供接口,還有個是object提供的copy接口,由於是桶內的rename,使用object的copy&delete方案最快。異步

4,fixpath是本身的業務代碼,根據需求能夠對應修改便可tornado

相關文章
相關標籤/搜索