phpBB3導入帖子的Python腳本

關聯的數據表

在phpBB3中導入用戶時, 須要處理的有兩張表, 一個是 topics, 一個是 posts.
爲了方便與原數據關聯, 須要在這兩個表上新增一個字段並創建惟一索引php

ALTER TABLE `topics` ADD COLUMN `ori_id` VARCHAR(32) NOT NULL DEFAULT '' AFTER `poll_vote_change`;
ALTER TABLE `posts` ADD COLUMN `ori_id` VARCHAR(32) NOT NULL DEFAULT '' AFTER `post_edit_locked`;
ALTER TABLE `topics` ADD UNIQUE INDEX `ori_id` USING BTREE (`ori_id`);
ALTER TABLE `posts` ADD UNIQUE INDEX `ori_id` USING BTREE (`ori_id`);

若是是新安裝的論壇, 在每次導入以前, 用如下語句初始化:mysql

TRUNCATE `topics`;
TRUNCATE `posts`;

須要的最小數據集sql

posts表, 須要的最小字段集爲 `topic_id`, `forum_id`, `poster_id`, `poster_ip`, `post_time`, `enable_sig`, `post_username`, `post_subject`, `post_text`, `post_checksum`, `post_attachment`, `ori_id`
topics表, 須要的最小字段集爲 `forum_id`, `topic_title`, `topic_time`, `ori_id`緩存

導入topic和post時的處理邏輯
按建立時間將post正序排列後, 依次處理: 檢查是不是topic的第一篇, 若是是則建立topic, 若是不是則去查找(目標db, 源db, 若是找不的第一篇時, 須要將本身設爲第一篇), 這樣topicId肯定後就能夠寫入post了, 而後再拿寫入產生的postId, 去更新topic的信息函數

Python代碼

子函數post

import rbcommon
import re


def getTopic(cursor, articleId):
    sql = 'SELECT topic_id FROM phpbb_topics WHERE ori_id = %s'
    cursor.execute(sql, (articleId))
    row = cursor.fetchone()
    if (row is not None):
        return row['topic_id']
    else:
        return 0

def getAuthor(cursor, name):
    sql = 'SELECT user_id FROM phpbb_users WHERE username_clean = %s'
    cursor.execute(sql, (name.lower()))
    row = cursor.fetchone()
    if (row is not None):
        return row['user_id']
    else:
        print('Not exists:{}'.format(name))
        exit()

def insertTopic(cursor, forum_id, topic_title, topic_time, ori_id):
    sql = 'INSERT IGNORE INTO `phpbb_topics` (`forum_id`, `topic_title`, `topic_time`, `ori_id`) ' \
          'VALUES (%s, %s, %s, %s)'
    cursor.execute(sql, (forum_id, topic_title, topic_time, ori_id))
    lastId = cursor.lastrowid
    return lastId

def insertPost(cursor, topic_id, forum_id, poster_id, poster_ip, post_time, post_username, post_subject, post_text, post_attachment, ori_id):
    sql = 'INSERT IGNORE INTO `phpbb_posts` (`topic_id`, `forum_id`, `poster_id`, `poster_ip`, `post_time`, `enable_sig`, ' \
          '`post_username`, `post_subject`, `post_text`, `post_checksum`, `post_attachment`, `ori_id`) ' \
          'VALUES (%s, %s, %s, %s, %s, 0, %s, %s, %s, \'\', %s, %s)'
    cursor.execute(sql, (topic_id, forum_id, poster_id, poster_ip, post_time, post_username, post_subject, post_text, post_attachment, ori_id))
    lastId = cursor.lastrowid
    if (lastId == 0):
        print('Duplicate ID:>{}<'.format(ori_id))
    return lastId

def updateTopicFirst(cursor, authorId, postId, author, title, createdAt, topicId):
    sql = 'UPDATE phpbb_topics SET ' \
          'topic_poster=%s, topic_first_post_id=%s, topic_first_poster_name=%s, ' \
          'topic_last_poster_id=%s, topic_last_post_id=%s, topic_last_poster_name=%s, topic_last_post_subject=%s, topic_last_post_time=%s WHERE `topic_id`=%s'

    cursor.execute(sql, (authorId, postId, author, authorId, postId, author, title, createdAt, topicId))

def updateTopic(cursor, authorId, postId, author, title, createdAt, topicId):
    sql = 'UPDATE phpbb_topics SET topic_replies=topic_replies+1, topic_replies_real=topic_replies_real+1, ' \
          'topic_last_poster_id=%s, topic_last_post_id=%s, topic_last_poster_name=%s, topic_last_post_subject=%s, topic_last_post_time=%s WHERE `topic_id`=%s'
    cursor.execute(sql, (authorId, postId, author, title, createdAt, topicId))

主方法fetch

tb_article_all = rbcommon.db['article_all']
limit = 1000
total = tb_article_all.estimated_document_count()
for i in range(0, total, limit):
    print("\n" + '######## Start:' + str(i) + ', limit:' + str(limit) + ' ########')
    articles = tb_article_all.find().sort('createdAt', 1).limit(limit).skip(i)
    for article in articles:
        # extract the forumId, author, etc
        pos = article['_id'].find('.')
        forumId = article['_id'][0:pos]
        author = article['author'].strip()
        posterIp = '' if (not 'ip' in article) else article['ip']
        attachments = 0 if (len(article['attachments']) == 0) else 1
        # content = article['content'].replace('\\n', '\n')
        content = re.sub(r'\\n', '\n', article['content'])
        content = re.sub(r'\\r\[[;\d]{0,8}m', '', content)
        content = re.sub(r'\\(/|"|\')', r'\1', content)

        with rbcommon.mysqlclient.cursor() as cursor:
            # get author Id
            authorId = getAuthor(cursor, author)

            # Check if it is a topic
            firstPostFlag = False
            if (article['_id'] == article['parentId']):
                firstPostFlag = True
                # if yes, check if it exists
                topicId = getTopic(cursor, article['_id'])
                if (topicId == 0):
                    # if not, insert a topic, get the topicId
                    topicId = insertTopic(cursor, forumId, article['title'], article['createdAt'], article['_id'])
            else:
                # if not a topic, get the topic ID
                topicId = getTopic(cursor, article['parentId'])
                if (topicId == 0):
                    # if not exists, find it and insert it(topic record), and get the topic ID
                    dummy = tb_article_all.find_one({'_id': article['parentId']})
                    # if dummy not exists, make this post the first post
                    if (dummy is None):
                        dummy_title = article['title']
                        dummy_createdAt = article['createdAt']
                        dummy_author = article['author'].strip()
                        dummy_ori_id = article['parentId']
                        firstPostFlag = True
                    else:
                        dummy_title = dummy['title']
                        dummy_createdAt = dummy['createdAt']
                        dummy_author = dummy['author'].strip()
                        dummy_ori_id = dummy['_id']
                    topicId = insertTopic(cursor, forumId, dummy_title, dummy_createdAt, dummy_ori_id)

            # should not be 0 at this point
            if (topicId == 0):
                print('Failed to get topicId for {}'.format(article['_id']))
                exit()

            # perform the actual post insert
            postId = insertPost(cursor, topicId, forumId, authorId, posterIp, article['createdAt'], author, article['title'], content, attachments, article['_id'])
            if (postId == 0):
                print('Post already exists: {}'.format(article['_id']))
                rbcommon.mysqlclient.rollback()
                continue

            # update the topic
            if (firstPostFlag):
                updateTopicFirst(cursor, authorId, postId, author, article['title'], article['createdAt'], topicId)
            else:
                updateTopic(cursor, authorId, postId, author, article['title'], article['createdAt'], topicId)

        # commit all changes at last
        rbcommon.mysqlclient.commit()

 

同步版面數據

導入結束後, 即便同步後臺數據和清空緩存, 在前臺也是看不到版面文章的, 顯示都是空. 能夠在後臺的版面管理中, 點擊版面右側的同步圖標, 對每一個版面進行手動同步. 若是版面較多不合適手工處理, 則能夠在論壇的根目錄下, 建立下面這個腳本 tmp.php:ui

<?php
define('IN_PHPBB', true);
$phpbb_root_path = (defined('PHPBB_ROOT_PATH')) ? PHPBB_ROOT_PATH : './';
$phpEx = substr(strrchr(__FILE__, '.'), 1);
include($phpbb_root_path . 'common.' . $phpEx);
include($phpbb_root_path . 'includes/functions_display.' . $phpEx);
require($phpbb_root_path . 'includes/functions_admin.' . $phpEx);
echo 'Start';
sync('forum', '', '', false, true);
echo 'Done';
$cache->destroy('sql', FORUMS_TABLE);
?>

在命令行下執行命令 php tmp.php 版面的數據就所有同步了.this

.命令行

相關文章
相關標籤/搜索