搜点什么?

WP的数据导入到Jpress

发布时间:2021-01-17 22:53

本文作者:

WP的数据导入到Jpress

enter description here

我之前所有的文章都是在本地用小书匠写的,图片是存放到COS上,所以我的数据迁移非常方便 Jpress自带有一个“WordPress文章导入”,但是不会保存分类、标签关系,所以自己弄了一个python脚本配合,导入文章的时候,旧的路径和新的路径我们存放到一个新的表.

每个网站情况都不一样,这个仅供参考,如果有需要,可以联系

1、先把数据整理一下,WP的各个表之间的关系就不多说了,可以百度

-- 查询 分类 标签 专题等
-- select  taxonomy,count(1) from swp_term_taxonomy group by taxonomy

--  category 分类 post_tag 标签  special 专题

-- 创建会话级别的临时表
CREATE TABLE IF NOT EXISTS Wp_posts_temp (
	object_id bigint 
    ,taxonomy VARCHAR(50) 
    ,names VARCHAR(500) 
) ENGINE = MEMORY; 

-- 插入临时表
-- drop TABLE Wp_posts_temp
insert into  Wp_posts_temp(object_id,taxonomy,names)
SELECT 
		object_id,
		taxonomy,
		group_concat( a.NAME SEPARATOR ',' ) AS NAMES 
	FROM
		(
		SELECT DISTINCT
			b.object_id,
			c.taxonomy,
			d.NAME 
		FROM
			swp_term_relationships b
			INNER JOIN swp_term_taxonomy c ON c.term_taxonomy_id = b.term_taxonomy_id
			INNER JOIN swp_terms d ON d.term_id = c.term_id 
		WHERE
			1=1
			-- and  c.taxonomy = 'special' 
			-- and b.object_id in (3680,3677)
		) a 
	GROUP BY
		object_id ,taxonomy;


select a.* from Wp_posts_temp a;



-- delete  from Wp_posts_temp a;
-- 查询数据
SELECT
	a.id,
	a.post_title,
	a.post_name,
	a.post_content,
	t1.NAMES AS tg_names,
	t2.NAMES AS ca_names,
	t3.NAMES AS sp_names ,
	0 as status
FROM
	swp_posts a 
	LEFT JOIN  Wp_posts_temp t1 ON t1.taxonomy='post_tag' and  t1.object_id = a.id -- 标签
	LEFT JOIN  Wp_posts_temp t2 ON t2.taxonomy='category' and  t2.object_id = a.id -- 分类
	LEFT JOIN  Wp_posts_temp t3 ON t3.taxonomy='special'  and  t3.object_id = a.id -- 专题
WHERE
	a.post_type = 'post' 
	AND a.post_status = 'publish' -- and a.id in (3680,3677)
	
ORDER BY
	a.post_date DESC 
	 -- LIMIT 0,10

2、这样就得到了这样一份数据

enter description here

3、后面在用写好都python脚本导入

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@File    :   wp2halo
@Contact :   lau52y@163.com
@Modify Time      @Author    @Version    @Desciption
------------      -------    --------    -----------
2020/5/26 12:33 上午   lau52y     1.0         

'''
import pymongo
from ifuntoolsutils import *
import pymysql


class wp2halo(object):
    def __init__(self):

        # 数据库
        self.conn = pymongo.MongoClient("mongodb链接地址")
        self.db = self.conn.ifuntools
        # 选择数据库集合
        self.collection = self.db.wp

        # 打开数据库连接
        self.pyconn = pymysql.connect('ip', '用户名', '密码', '数据库', autocommit=True)
        # 使用cursor()方法创建一个游标对象
        self.cursor = self.pyconn.cursor(cursor=pymysql.cursors.DictCursor)

        self.csrf_token = "ac7f251eb9a04f6a8e1e25eb625231eb"

        

        self.headers = {
            'authority': 'www.iplaymac.cn',
            'accept': '*/*',
            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36',
            'x-requested-with': 'XMLHttpRequest',
            'sec-fetch-site': 'same-origin',
            'sec-fetch-mode': 'cors',
            'sec-fetch-dest': 'empty',
            'referer': 'http://localhost:8080/publishArticle',
            'accept-language': 'zh-CN,zh;q=0.9',
            'cookie': '_jpanonym=NTBmZjhiMjZhODc3NTI2OGFkNjJmNWM4ZWE0NzA0NDIjMTU5NTgxNzI4MzAxNCMzMTUzNjAwMCNOVGRsTVRjek56QTJNelE0TkdJNE5EZzBaVEpqWWpOa05UbGpZakUyT0RBPQ==; _jpuid=YTg0MDgxOWI5MDVkZDkzYzE4MDhkNjEwYWQ2YTNmNmIjMTU5NTgxNzg4MTMxNCMxNzI4MDAjTVE9PQ==; Hm_lvt_bfe2407e37bbaa8dc195c5db42daf96a=1595817290,1595817882; csrf_token=ac7f251eb9a04f6a8e1e25eb625231eb; Hm_lpvt_bfe2407e37bbaa8dc195c5db42daf96a=1595839824',
        }

        # self.headers['user-agent'] = random.choice(user_agent_list)

    def get_category(self, categoryName):
        """
        获取分类
        :return:
        """
        categoryid = None
        # 查询分类
        sql = "SELECT id,slug,title,flag FROM `article_category` WHERE `title` = '{0}' LIMIT 1".format(categoryName)
        try:
            # 执行SQL语句
            print(sql)
            self.cursor.execute(sql)
            # 获取所有记录列表
            results = self.cursor.fetchone()
            if results is not None:
                categoryid = results['id']
            # 打印结果
        except Exception as e:
            print(e)
            print("Error: unable to fecth data")

        return categoryid

    def create_category(self, categoryName):
        """
        创建分类
        :param categoryName:
        :return:
        """

        slug = create_slug(categoryName)
        data = {
            'category.id': '',
            'category.type': 'category',
            'category.title': categoryName,
            'category.pid': '0',
            'category.slug': slug,
            'category.order_number': '',
            'category.content': '',
            'category.summary': '',
            'category.icon': '',
            'category.flag': slug,
            'displayInMenu': 'false',
            'category.meta_description': '',
            'category.meta_keywords': '',
            'csrf_token': self.csrf_token
        }
        # requests.packages.urllib3.disable_warnings()
        response = requests.post('http://localhost:8080/publishArticle/doCategorySave?csrf_token=' + self.csrf_token, headers=self.headers, data=data)

        if response.status_code != 200:
            raise Exception("分类创建失败!!!")

    def category(self, categoryName):
        """
        获取分类,不存在则创建
        :param categoryName:
        :return:
        """
        categoryid = self.get_category(categoryName)
        if categoryid is None:
            self.create_category(categoryName)
            categoryid = self.get_category(categoryName)

        if categoryid is None:
            raise Exception("分类创建失败")
        return categoryid

    def get_thumbnail(self, content):
        """
        提取图片
        """
        src = None
        try:
            pattern = '<img[^>]*/>'
            result = re.findall(pattern, content)
            pattern = "src=\"(.*?)\""
            result = re.findall(pattern, result[0])
            src = result[0]
        except Exception as e:
            print("图片提取失败")
        return src

    def gen_posts_data(self, result):
        try:

            # 查询分类
            categoryIds = []
            ca_names = result['ca_names']
            if ca_names is not None:
                ca_names = ca_names.split(',')
                categoryIds = list(map(lambda categoryName: self.category(categoryName), ca_names))
                categoryIds = list(set(categoryIds))

            result['slug'] = create_slug(result['post_name'])
            result['categoryIds'] = categoryIds

            # 推送
            self.publish_posts(result)

            self.insertarticleKv(result)

            # 插入映射表
            result['status'] = 2
            self.collection.update({"_id": result['_id']}, result, upsert=True)
            print('发布成功')
            return True
        except Exception as e:
            print(e)
            print('发布失败')
            result['post_name'] = result['post_title']
            self.gen_posts_data(result)
            return True

    def insertarticleKv(self, result):

        sql = "SELECT max(id) as id FROM `article`"
        try:
            # 执行SQL语句
            print(sql)
            self.cursor.execute(sql)
            # 获取所有记录列表
            results = self.cursor.fetchone()
            if results is None:
                return
            articleId = results['id']
            # 打印结果
            sql = "INSERT INTO `articlekv`(`oldkey`, `newkey`) VALUES ('{0}', '{1}')".format(result['post_name'], articleId)
            self.cursor.execute(sql)
        except Exception as e:
            print(e)
            print("Error: unable to fecth data")

    def publish_posts(self, result):

        params = (
            ('csrf_token', self.csrf_token),
        )

        # thumbnail=self.get_thumbnail(result['post_content'])

        data = [
            ('article.status', 'normal'),  # normal  draft
            ('article.id', ''),
            ('article.user_id', '1'),
            ('article.edit_mode', 'markdown'),
            ('article.title', result['post_title']),
            ('article.slug', ''),
            ('article.content', result['post_content']),
            ('article.summary', ''),
            ('article.meta_keywords', ''),
            ('article.meta_description', ''),
            ('article.order_number', ''),
            ('article.link_to', ''),
            ('article.created', ''),
            ('article.comment_status', 'true'),
            # ('category', '1'),
            # ('category', '2'),
            # ('tag', '\u6492\u6253\u7B97'),
            # ('tag', '\u6492\u5927\u7684'),
            ('article.thumbnail', ''),
            ('article.flag', ''),
            ('csrf_token', self.csrf_token),
        ]
        # 分类
        for id in result['categoryIds']:
            data.append(('category', id))

        # 标签
        tag_names = result['tg_names']
        tagList=[]
        if tag_names is not None:
            tags = tag_names.split(',')
            for tag in tags:
                ".", "_"
                tag = tag.replace(".", "")
                tag = tag.replace("_", "")
                tagList.append(tag)

        tags = list(set(tagList))
        for tag in tags:
            data.append(('tag', tag))

        requests.packages.urllib3.disable_warnings()
        response = requests.post('http://localhost:8080/publishArticle/doWriteSave', headers=self.headers, params=params, data=data, verify=False)

        if response.status_code != 200:
            print(response.content)
            raise Exception("发布失败")

    def start_post(self):
        flag = True
        while flag:
            result = self.collection.find_one({"status": "0"})
            if result is not None:
                flag = self.gen_posts_data(result)
                print("发布一次")
            else:
                break
            time.sleep(1)


if __name__ == '__main__':
    wh = wp2halo()
    wh.start_post()
    # wh.gen_posts_data()

相关文章

加入我们!

如果你想系统学习JPress涉及到的知识点,或者希望有人解答你在深度使用JPress时遇到问题,
加入QQ群是个很不错的选择