在博客園記錄了一些文章,想把它備份到github上,還好大部分博文都是markdown格式的,博客園也支持備份導出,可是處處的是單個的XML文件。
爲了把每一篇博文單獨提取出來,因此寫了一個小程序來提取。
c++
github中須要以下圖所示的格式,方能正確的分類
文件名須要日期開頭,文件內容中最前面一段是文章的一些描述信息git
程序是用Golang編寫的,代碼以下:github
// cnblogs2githubpages project main.go package main import ( "bytes" "encoding/xml" "fmt" "io/ioutil" "os" "strings" "time" ) // 結構體中要可以進行XML解析,則字段名必須以大寫開頭 // 帖子 type Post struct { XMLName xml.Name `xml:"item"` Title string `xml:"title"` Link string `xml:"link"` Creator string `xml:"dc:creator"` Author string `xml:"author"` PubDate string `xml:"pubDate"` Guid string `xml:"guid"` Description string `xml:"description,CDATA"` } type Blogs struct { XMLName xml.Name `xml:"channel"` Title string `xml:"title"` Link string `xml:"link"` Description string `xml:"description"` Language string `xml:"language"` LastBuildDate string `xml:"lastBuildDate"` PubDate string `xml:"pubDate"` Ttl string `xml:"ttl"` Items []Post `xml:"item"` } type RSS struct { XMLName xml.Name `xml:"rss"` Blogs Blogs `xml:"channel"` } func main() { if len(os.Args) != 2 { return } backupxml, err := ioutil.ReadFile(os.Args[1]) if err != nil { fmt.Println(err.Error()) return } fmt.Println(len(backupxml)) b := RSS{} err = xml.Unmarshal(backupxml, &b) if err != nil { fmt.Println(err.Error()) return } fmt.Println(len(b.Blogs.Items)) // 逐個導出 for i, _ := range b.Blogs.Items { var item = &(b.Blogs.Items[i]) t, _ := time.Parse(time.RFC1123, item.PubDate) postdate := t.Format("2006-01-02") // fmt.Printf("%s\n\t%s\n\t%s\n\t%s\n\t%s\n", date, item.Title, item.Link, item.Author, item.Description[0:64]) postTitle := strings.ReplaceAll(item.Title, " ", "-") postTitle = strings.ReplaceAll(postTitle, "*", "") postTitle = strings.ReplaceAll(postTitle, "/", ".") postTitle = strings.ReplaceAll(postTitle, "\\", "") postTitle = strings.ReplaceAll(postTitle, "$", "") postTitle = strings.ReplaceAll(postTitle, "?", "") postTitle = strings.ReplaceAll(postTitle, ":", "-") postTitle = strings.ReplaceAll(postTitle, "。", "") filename := fmt.Sprintf("./%s-%s.md", postdate, postTitle) fmt.Println(filename) // 根據博文的標題,作一個簡單的分類(只適合當前狀況) var categories string = "其它" { title2 := strings.ToLower(item.Title) if strings.Contains(title2, "live555") { categories = "live555" } else if strings.Contains(title2, "linux") || strings.Contains(title2, "ubuntu") { categories = "linux" } else if strings.Contains(title2, "gcc") || strings.Contains(title2, "git") || strings.Contains(title2, "編程") || strings.Contains(title2, "編譯") || strings.Contains(title2, "vc") || strings.Contains(title2, "c++") || strings.Contains(title2, "visual") || strings.Contains(title2, "程序") { categories = "編程" } else if strings.Contains(title2, "gdal") || strings.Contains(title2, "proj") || strings.Contains(title2, "gis") || strings.Contains(title2, "地理") { categories = "地理信息" } } var desc bytes.Buffer desc.WriteString("---\r\n") desc.WriteString("layout: post\r\n") desc.WriteString("title: \"") desc.WriteString(item.Title) desc.WriteString("\"\r\ndate: ") desc.WriteString(postdate) desc.WriteString("\r\ncategories: ") desc.WriteString(categories) desc.WriteString("\r\ntags: ") desc.WriteString(categories) desc.WriteString("\r\ncomments: 1\r\n") desc.WriteString("---\r\n") tocIndex := strings.Index(item.Description, "") if tocIndex != -1 { tocIndex += len("[TOC]") desc.WriteString(item.Description[0:tocIndex]) desc.WriteString("\r\n[博客園原文地址 ") desc.WriteString(item.Link) desc.WriteString("](") desc.WriteString(item.Link) desc.WriteString(")\r\n\r\n") desc.WriteString(item.Description[tocIndex:]) } else { desc.WriteString("\r\n[TOC]\r\n[博客園文章地址 ") desc.WriteString(item.Link) desc.WriteString("](") desc.WriteString(item.Link) desc.WriteString(")\r\n") desc.WriteString(item.Description) } err := ioutil.WriteFile(filename, desc.Bytes(), os.ModePerm) if err != nil { fmt.Println(err.Error()) } } }