python實現CentOS下文件複製

原由是公司想對現有網站作SEO優化,在優化URL這一塊,思路是簡化路徑,是鏈接更短!html

第一步: 分析規則python

原始表
SELECT id , uri,www_uri from `article` LIMIT 100mysql

分析一條鏈接:
/detail/0ztm/161011/1476154039.html
真實存儲路徑是:
/_/boss/0/zt/m/16/10/11/1476154039.htmlsql

咱們要作的就是把1476154039.html 替換成從2000開始計數的文件名,例如 2000.html數據庫

第二步:分析表中全部鏈接,轉化成真實路徑,方便後邊python直接調用fetch

考慮把不在原始表上作修改,新建表優化

create table article_pathzsq
(
    zsid BIGINT not null AUTO_INCREMENT PRIMARY KEY,
    id BIGINT ,
    uri VARCHAR(255),
    www_uri varchar(255),
    uri_path varchar(512),
    uri_htm varchar(255),
    www_uri_path varchar(512),
    www_uri_htm varchar(512),
    inum varchar(128),
    isflag int not null DEFAULT 0
)
insert into article_pathzsq (id,uri,www_uri,uri_path,uri_htm,www_uri_path,www_uri_htm)
SELECT a.id , a.uri , a.www_uri ,
CONCAT(
(case when substring_index(a.uri,'/',3) = '/detail/0nsm' then
            REPLACE(substring_index(a.uri,'/',3), '/detail/0nsm', '/_/boss/0/ns/m')
         when substring_index(a.uri,'/',3) = '/detail/0ztm' then
            REPLACE(substring_index(a.uri,'/',3), '/detail/0ztm', '/_/boss/0/zt/m')
         when substring_index(a.uri,'/',3) = '/detail/3nsm' then
            REPLACE(substring_index(a.uri,'/',3), '/detail/3nsm', '/_/boss/3/ns/m')
         when substring_index(a.uri,'/',3) = '/detail/3ztm' then
            REPLACE(substring_index(a.uri,'/',3), '/detail/3ztm', '/_/boss/3/zt/m')
END) ,'/',
SUBSTR(substring_index(a.uri,'/',-2),1,2) ,'/' ,SUBSTR(substring_index(a.uri,'/',-2),3,2),'/',SUBSTR(substring_index(a.uri,'/',-2),5,2),'/'
) as uri_path  , SUBSTR(substring_index(a.uri,'/',-2),8,1000) as uri_htm ,

CONCAT(
(case when substring_index(a.www_uri,'/',3) = '/detail/0nswww' then
            REPLACE(substring_index(a.www_uri,'/',3), '/detail/0nswww', '/_/boss/0/ns/www')
         when substring_index(a.www_uri,'/',3) = '/detail/0ztwww' then
            REPLACE(substring_index(a.www_uri,'/',3), '/detail/0ztwww', '/_/boss/0/zt/www')
         when substring_index(a.www_uri,'/',3) = '/detail/3nswww' then
            REPLACE(substring_index(a.www_uri,'/',3), '/detail/3nswww', '/_/boss/3/ns/www')
         when substring_index(a.www_uri,'/',3) = '/detail/3ztwww' then
            REPLACE(substring_index(a.www_uri,'/',3), '/detail/3ztwww', '/_/boss/3/zt/www')
END) ,'/',
SUBSTR(substring_index(a.www_uri,'/',-2),1,2) ,'/' ,SUBSTR(substring_index(a.www_uri,'/',-2),3,2),'/',SUBSTR(substring_index(a.www_uri,'/',-2),5,2),'/'
) as www_uri_path  , SUBSTR(substring_index(a.www_uri,'/',-2),8,1000) as www_uri_htm
from `article` a
where uri not REGEXP '/_/boss'
LIMIT 10

第三步:在article_pathzsq,規劃新建文件名稱,按照規則,從2000計數遞增網站

UPDATE article_pathzsq SET inum = CONCAT( (zsid + 2000) , '.html')
where inum is null

 

而後用python(我這裏使用的是python2.6,系統自帶的),讀取數據庫,結果集;按照結果集,複製以前的文件,生成新的文件:code

#!python
# -*- coding: UTF-8 -*-
import MySQLdb
import os

dbip = "192.168.0.0"
dbuser = "mysql"
dbpwd = r"000000"
dbdata = "zt"

# 打開數據庫鏈接
db = MySQLdb.connect(dbip,dbuser,dbpwd,dbdata )
# 使用cursor()方法獲取操做遊標
cursor = db.cursor()
try:
    # 執行sql語句
    sqls="SELECT a.zsid , a.uri_path , a.uri_htm , a.www_uri_path, a.www_uri_htm , a.inum  from article_pathzsq a where a.isflag=0 LIMIT 10;"
    cursor.execute(sqls)
    results = cursor.fetchall()
except:
    print("讀取失敗")
# 關閉數據庫鏈接
db.close()

for row in results:
    zsid = row[0]
    uri_path = row[1]
    uri_htm = row[2]
    www_uri_path = row[3]
    www_uri_htm = row[4]
    inum = row[5]
    # 打印結果
    """
    print(" zsid= %d , uri_path= %s , uri_htm= %s ,www_uri_path= %s , www_uri_htm= %s ,inum= %s " % \
          (zsid, uri_path, uri_htm, www_uri_path, www_uri_htm, inum))
    """
    uri_ps = 'cp  ' + uri_path + uri_htm + '  ' + uri_path + inum
    print(uri_ps)
    #os.system(uri_ps)
    wwwuri_ps = 'cp  ' + www_uri_path + www_uri_htm + '  ' + www_uri_path + inum
	print(wwwuri_ps)
    #os.system(wwwuri_ps)
相關文章
相關標籤/搜索