Golang 正則表達式（regexp）

時間 2019-12-07

標籤 golang 正則表達式 regexp 欄目 Go 简体版

原文原文鏈接

Go內置了（regexp包）對正則表達式的支持，這裏是通常的正則表達式常規用法的例子。html

示例：git

package main

import (
    "bytes"
    "fmt"
    "regexp"
)

func main() {
    //是否匹配字符串
    // .匹配任意一個字符 ，*匹配零個或多個 ，優先匹配更多(貪婪)
    match, _ := regexp.MatchString("H(.*)d!", "Hello World!")
    fmt.Println(match) //true
    //或
    match, _ = regexp.Match("H(.*)d!", []byte("Hello World!"))
    fmt.Println(match) //true
    //或經過`Compile`來使用一個優化過的正則對象
    r, _ := regexp.Compile("H(.*)d!")
    fmt.Println(r.MatchString("Hello World!")) //true

    // 這個方法返回匹配的子串
    fmt.Println(r.FindString("Hello World! world")) //Hello World!
    //同上
    fmt.Println(string(r.Find([]byte("Hello World!")))) //Hello World!

    // 這個方法查找第一次匹配的索引
    // 的起始索引和結束索引，而不是匹配的字符串
    fmt.Println(r.FindStringIndex("Hello World! world")) //[0 12]

    
    // 這個方法返回全局匹配的字符串和局部匹配的字符，匹配最大的子字符串一次。
    // 它和r.FindAllStringSubmatch("Hello World! world"，1) 等價。  好比
    // 這裏會返回匹配`H(.*)d!`的字符串
    // 和匹配`(.*)`的字符串
    fmt.Println(r.FindStringSubmatch("Hello World! world")) //[Hello World! ello Worl]

    // 和上面的方法同樣，不一樣的是返回全局匹配和局部匹配的
    // 起始索引和結束索引
    fmt.Println(r.FindStringSubmatchIndex("Hello World! world")) //[0 12 1 10]
    // 這個方法返回全部正則匹配的字符，不單單是第一個
    fmt.Println(r.FindAllString("Hello World! Held! world", -1)) //[Hello World! Held!]

    // 這個方法返回全部全局匹配和局部匹配的字符串起始索引,只匹配最大的串
    // 和結束索引
    fmt.Println(r.FindAllStringSubmatchIndex("Hello World! world", -1))       //[[0 12 1 10]]
    fmt.Println(r.FindAllStringSubmatchIndex("Hello World! Held! world", -1)) //[[0 18 1 16]]

    // 爲這個方法提供一個正整數參數來限制匹配數量
    res, _ := regexp.Compile("H([a-z]+)d!")
    fmt.Println(res.FindAllString("Hello World! Held! Hellowrld! world", 2)) //[Held! Hellowrld!]

    fmt.Println(r.FindAllString("Hello World! Held! world", 2)) //[Hello World! Held!]
    //注意上面兩個不一樣，第二參數是一最大子串爲單位計算。

    // regexp包也能夠用來將字符串的一部分替換爲其餘的值
    fmt.Println(r.ReplaceAllString("Hello World! Held! world", "html")) //html world

    // `Func`變量可讓你將全部匹配的字符串都通過該函數處理
    // 轉變爲所須要的值
    in := []byte("Hello World! Held! world")
    out := r.ReplaceAllFunc(in, bytes.ToUpper)
    fmt.Println(string(out))

    // 在 b 中查找 reg 中編譯好的正則表達式，並返回第一個匹配的位置
    // {起始位置, 結束位置}
    b := bytes.NewReader([]byte("Hello World!"))
    reg := regexp.MustCompile(`\w+`)
    fmt.Println(reg.FindReaderIndex(b)) //[0 5]

    // 在 字符串 中查找 r 中編譯好的正則表達式，並返回全部匹配的位置
    // {{起始位置, 結束位置}, {起始位置, 結束位置}, ...}
    // 只查找前 n 個匹配項，若是 n < 0，則查找全部匹配項

    fmt.Println(r.FindAllIndex([]byte("Hello World!"), -1)) //[[0 12]]
    //同上
    fmt.Println(r.FindAllStringIndex("Hello World!", -1)) //[[0 12]]

    // 在 s 中查找 re 中編譯好的正則表達式，並返回全部匹配的內容
    // 同時返回子表達式匹配的內容
    // {
    //     {完整匹配項, 子匹配項, 子匹配項, ...},
    //     {完整匹配項, 子匹配項, 子匹配項, ...},
    //     ...
    // }
    // 只查找前 n 個匹配項，若是 n < 0，則查找全部匹配項
    reg = regexp.MustCompile(`(\w)(\w)+`)                      //[[Hello H o] [World W d]]
    fmt.Println(reg.FindAllStringSubmatch("Hello World!", -1)) //[[Hello H o] [World W d]]

    // 將 template 的內容通過處理後，追加到 dst 的尾部。
    // template 中要有 $一、$二、${name1}、${name2} 這樣的「分組引用符」
    // match 是由 FindSubmatchIndex 方法返回的結果，裏面存放了各個分組的位置信息
    // 若是 template 中有「分組引用符」，則以 match 爲標準，
    // 在 src 中取出相應的子串，替換掉 template 中的 $一、$2 等引用符號。
    reg = regexp.MustCompile(`(\w+),(\w+)`)
    src := []byte("Golang,World!")           // 源文本
    dst := []byte("Say: ")                   // 目標文本
    template := []byte("Hello $1, Hello $2") // 模板
    m := reg.FindSubmatchIndex(src)          // 解析源文本
    // 填寫模板，並將模板追加到目標文本中
    fmt.Printf("%q", reg.Expand(dst, template, src, m))
    // "Say: Hello Golang, Hello World"

    // LiteralPrefix 返回全部匹配項都共同擁有的前綴（去除可變元素）
    // prefix：共同擁有的前綴
    // complete：若是 prefix 就是正則表達式自己，則返回 true，不然返回 false
    reg = regexp.MustCompile(`Hello[\w\s]+`)
    fmt.Println(reg.LiteralPrefix())
    // Hello false
    reg = regexp.MustCompile(`Hello`)
    fmt.Println(reg.LiteralPrefix())
    // Hello true

    text := `Hello World! hello world`
    // 正則標記「非貪婪模式」(?U)
    reg = regexp.MustCompile(`(?U)H[\w\s]+o`)
    fmt.Printf("%q\n", reg.FindString(text)) // Hello
    // 切換到「貪婪模式」
    reg.Longest()
    fmt.Printf("%q\n", reg.FindString(text)) // Hello Wo

    // 統計正則表達式中的分組個數（不包括「非捕獲的分組」）
    fmt.Println(r.NumSubexp()) //1

    //返回 r 中的「正則表達式」字符串
    fmt.Printf("%s\n", r.String())

    // 在 字符串 中搜索匹配項，並以匹配項爲分割符，將 字符串 分割成多個子串
    // 最多分割出 n 個子串，第 n 個子串再也不進行分割
    // 若是 n < 0，則分割全部子串
    // 返回分割後的子串列表
    fmt.Printf("%q\n", r.Split("Hello World! Helld! hello", -1)) //["" " hello"]

    // 在 字符串 中搜索匹配項，並替換爲 repl 指定的內容
    // 若是 rep 中有「分組引用符」（$一、$name），則將「分組引用符」當普通字符處理
    // 所有替換，並返回替換後的結果
    s := "Hello World, hello!"
    reg = regexp.MustCompile(`(Hell|h)o`)
    rep := "${1}"
    fmt.Printf("%q\n", reg.ReplaceAllLiteralString(s, rep)) //"${1} World, hello!"

    // 在 字符串 中搜索匹配項，而後將匹配的內容通過 repl 處理後，替換 字符串 中的匹配項
    // 若是 repb 的返回值中有「分組引用符」（$一、$name），則將「分組引用符」當普通字符處理
    // 所有替換，並返回替換後的結果
    ss := []byte("Hello World!")
    reg = regexp.MustCompile("(H)ello")
    repb := []byte("$0$1")
    fmt.Printf("%s\n", reg.ReplaceAll(ss, repb))
    // HelloH World!

    fmt.Printf("%s\n", reg.ReplaceAllFunc(ss,
        func(b []byte) []byte {
            rst := []byte{}
            rst = append(rst, b...)
            rst = append(rst, "$1"...)
            return rst
        }))
    // Hello$1 World!

}

小結：github

一、golang

r, _ := regexp.Compile("H(.*)d!")

可用一下代替正則表達式

r := regexp.MustCompile("H(.*)d!")

二者區別 MustCompile 少一個返回值errexpress

看源碼app

// Compile parses a regular expression and returns, if successful,
// a Regexp object that can be used to match against text.
//...
// For POSIX leftmost-longest matching, see CompilePOSIX.
func Compile(expr string) (*Regexp, error) {
    return compile(expr, syntax.Perl, false)
}

// MustCompile is like Compile but panics if the expression cannot be parsed.
// It simplifies safe initialization of global variables holding compiled regular
// expressions.
func MustCompile(str string) *Regexp {
   regexp, err := Compile(str)
   if err != nil {
      panic(`regexp: Compile(` + quote(str) + `): ` + err.Error())
   }
   return regexp
}

二、regexp的處理byte的方法都有個string方法對應，二者功能同樣。函數

例如：優化