定時任務實時生成pdf,將文件tornado用異步io上傳到s3,有幾個坑記錄下:python
import re import boto3 import logging from multiprocessing.dummy import Pool as ThreadPool AWS_REGION_NAME = 'cn-north-1' AWS_S3_ACCESS_KEY_ID = "" AWS_S3_SECRET_ACCESS_KEY = "" AWS_S3_WQS_BUCKET = "" service_name="s3" LOG_OUT_FILENAME = "/var/log/fixs3_out.log" LOG_ERROR_FILENAME = "/var/log/fixs3_err.log" def setup_logger(logger_name, log_file, level=logging.INFO): l = logging.getLogger(logger_name) formatter = logging.Formatter(fmt='%(asctime)s.%(msecs)03d [ %(name)s:%(process)d ] - [ %(levelname)s ] %(message)s', datefmt='%Y-%m-%d %H:%M:%S') fileHandler = logging.FileHandler(log_file, mode='w') fileHandler.setFormatter(formatter) streamHandler = logging.StreamHandler() streamHandler.setFormatter(formatter) l.setLevel(level) l.addHandler(fileHandler) l.addHandler(streamHandler) setup_logger('log_out', LOG_OUT_FILENAME) out_logger = logging.getLogger('log_out') class S3StoryOperate(object): def __init__(self): try: session = boto3.session.Session(aws_access_key_id=AWS_S3_ACCESS_KEY_ID, aws_secret_access_key=AWS_S3_SECRET_ACCESS_KEY, region_name=AWS_REGION_NAME) self.resource = session.resource('s3', AWS_REGION_NAME) except Exception as ex: print ex exit(ex) def get_all_key(self,bucket_name): keys=[] for obj in self.resource.Bucket(bucket_name).objects.all(): keys.append(obj.key) return keys def rename_all_key(self): old_names=self.get_all_key(AWS_S3_WQS_BUCKET) pool = ThreadPool(4) def func(old_name): fix_result = self.fix_path(old_name) if fix_result: new_path = fix_result[1] self.resource.Object(AWS_S3_WQS_BUCKET, new_path).copy_from( CopySource=AWS_S3_WQS_BUCKET + "/" + old_name) self.resource.Object(AWS_S3_WQS_BUCKET, old_name).delete() try: pool.map(func,old_names) pool.close() pool.join() except Exception as ex: print ex exit(ex) def fix_path(self,old_path): p=re.compile("/p(0|[1-9]\d?|[1-9]\d\d?|1000)-") result=re.search(p,old_path) if result: page_num=result.group().replace("/p","").replace("-","") else: return new_path=p.sub("/",old_path) out=(old_path,new_path,page_num) out_logger.info(out) return out def uploadfile(self,bucket,objkey,data): try: file_obj = self.resource.Bucket(bucket).put_object(Key=objkey, Body=data) except Exception as ex: print ex exit(ex) if __name__=="__main__": s3=S3StoryOperate() s3.rename_all_key() #s3.fix_path("szyz/unpaid/20170328/11/SX/220/214324456/d5e0fa9257b345faacd6959979265428/SZYZ20152055_96e3cbfd7dd74b6d88e0e968f5ca38b9.pdf")
1,boto3可能connect不上,文檔沒有提示,必定要設置時區,否則會403,報access deny;session
2, boto3沒有rename 接口文檔,在stackoverflow上面看了下,最好也是最快的解決方案就是copy+deleded。app
3,提供copy的接口有幾個,一個是bucket提供接口,一個是client提供接口,還有個是object提供的copy接口,由於是桶內的rename,使用object的copy&delete方案最快。異步
4,fixpath是本身的業務代碼,根據需求能夠對應修改便可tornado