Golang學習 - regexp 包

時間 2019-12-10
標籤 golang 學習 regexp 欄目 Go 简体版
原文原文鏈接
------------------------------------------------------------

// 函數

// 判斷在 b（s、r）中可否找到 pattern 所匹配的字符串
func Match(pattern string, b []byte) (matched bool, err error)
func MatchString(pattern string, s string) (matched bool, err error)
func MatchReader(pattern string, r io.RuneReader) (matched bool, err error)

// 將 s 中的正則表達式元字符轉義成普通字符。
func QuoteMeta(s string) string

------------------------------

// 示例：MatchString、QuoteMeta
func main() {
	pat := `(((abc.)def.)ghi)`
	src := `abc-def-ghi abc+def+ghi`

	fmt.Println(regexp.MatchString(pat, src))
	// true <nil>

	fmt.Println(regexp.QuoteMeta(pat))
	// \(\(\(abc\.\)def\.\)ghi\)
}

------------------------------------------------------------

// Regexp 表明一個編譯好的正則表達式，咱們這裏稱之爲正則對象。正則對象能夠
// 在文本中查找匹配的內容。
//
// Regexp 能夠安全的在多個例程中並行使用。
type Regexp struct { ... }

------------------------------

// 編譯

// 將正則表達式編譯成一個正則對象（使用 PERL 語法）。
// 該正則對象會採用「leftmost-first」模式。選擇第一個匹配結果。
// 若是正則表達式語法錯誤，則返回錯誤信息。
func Compile(expr string) (*Regexp, error)

// 將正則表達式編譯成一個正則對象（正則語法限制在 POSIX ERE 範圍內）。
// 該正則對象會採用「leftmost-longest」模式。選擇最長的匹配結果。
// POSIX 語法不支持 Perl 的語法格式：\d、\D、\s、\S、\w、\W
// 若是正則表達式語法錯誤，則返回錯誤信息。
func CompilePOSIX(expr string) (*Regexp, error)

// 功能同上，但會在解析失敗時 panic
func MustCompile(str string) *Regexp
func MustCompilePOSIX(str string) *Regexp

// 讓正則表達式在以後的搜索中都採用「leftmost-longest」模式。
func (re *Regexp) Longest()

// 返回編譯時使用的正則表達式字符串
func (re *Regexp) String() string

// 返回正則表達式中分組的數量
func (re *Regexp) NumSubexp() int

// 返回正則表達式中分組的名字
// 第 0 個元素表示整個正則表達式的名字，永遠是空字符串。
func (re *Regexp) SubexpNames() []string

// 返回正則表達式必須匹配到的字面前綴（不包含可變部分）。
// 若是整個正則表達式都是字面值，則 complete 返回 true。
func (re *Regexp) LiteralPrefix() (prefix string, complete bool)

------------------------------

// 示例：第一匹配和最長匹配
func main() {
	b := []byte("abc1def1")
	pat := `abc1|abc1def1`
	reg1 := regexp.MustCompile(pat)      // 第一匹配
	reg2 := regexp.MustCompilePOSIX(pat) // 最長匹配
	fmt.Printf("%s\n", reg1.Find(b))     // abc1
	fmt.Printf("%s\n", reg2.Find(b))     // abc1def1

	b = []byte("abc1def1")
	pat = `(abc|abc1def)*1`
	reg1 = regexp.MustCompile(pat)      // 第一匹配
	reg2 = regexp.MustCompilePOSIX(pat) // 最長匹配
	fmt.Printf("%s\n", reg1.Find(b))    // abc1
	fmt.Printf("%s\n", reg2.Find(b))    // abc1def1
}

------------------------------

// 示例：正則信息
func main() {
	pat := `(abc)(def)(ghi)`
	reg := regexp.MustCompile(pat)

	// 獲取正則表達式字符串
	fmt.Println(reg.String())    // (abc)(def)(ghi)

	// 獲取分組數量
	fmt.Println(reg.NumSubexp()) // 3

	fmt.Println()

	// 獲取分組名稱
	pat = `(?P<Name1>abc)(def)(?P<Name3>ghi)`
	reg = regexp.MustCompile(pat)

	for i := 0; i <= reg.NumSubexp(); i++ {
		fmt.Printf("%d: %q\n", i, reg.SubexpNames()[i])
	}
	// 0: ""
	// 1: "Name1"
	// 2: ""
	// 3: "Name3"

	fmt.Println()

	// 獲取字面前綴
	pat = `(abc1)(abc2)(abc3)`
	reg = regexp.MustCompile(pat)
	fmt.Println(reg.LiteralPrefix()) // abc1abc2abc3 true

	pat = `(abc1)|(abc2)|(abc3)`
	reg = regexp.MustCompile(pat)
	fmt.Println(reg.LiteralPrefix()) //  false

	pat = `abc1|abc2|abc3`
	reg = regexp.MustCompile(pat)
	fmt.Println(reg.LiteralPrefix()) // abc false
}

------------------------------

// 判斷

// 判斷在 b（s、r）中可否找到匹配的字符串
func (re *Regexp) Match(b []byte) bool
func (re *Regexp) MatchString(s string) bool
func (re *Regexp) MatchReader(r io.RuneReader) bool

------------------------------

// 查找

// 返回第一個匹配到的結果（結果以 b 的切片形式返回）。
func (re *Regexp) Find(b []byte) []byte

// 返回第一個匹配到的結果及其分組內容（結果以 b 的切片形式返回）。
// 返回值中的第 0 個元素是整個正則表達式的匹配結果，後續元素是各個分組的
// 匹配內容，分組順序按照「(」的出現次序而定。
func (re *Regexp) FindSubmatch(b []byte) [][]byte

// 功能同 Find，只不過返回的是匹配結果的首尾下標，經過這些下標能夠生成切片。
// loc[0] 是結果切片的起始下標，loc[1] 是結果切片的結束下標。
func (re *Regexp) FindIndex(b []byte) (loc []int)

// 功能同 FindSubmatch，只不過返回的是匹配結果的首尾下標，經過這些下標能夠生成切片。
// loc[0] 是結果切片的起始下標，loc[1] 是結果切片的結束下標。
// loc[2] 是分組1切片的起始下標，loc[3] 是分組1切片的結束下標。
// loc[4] 是分組2切片的起始下標，loc[5] 是分組2切片的結束下標。
// 以此類推
func (re *Regexp) FindSubmatchIndex(b []byte) (loc []int)

------------------------------

// 示例：Find、FindSubmatch
func main() {
	pat := `(((abc.)def.)ghi)`
	reg := regexp.MustCompile(pat)

	src := []byte(`abc-def-ghi abc+def+ghi`)

	// 查找第一個匹配結果
	fmt.Printf("%s\n", reg.Find(src)) // abc-def-ghi

	fmt.Println()

	// 查找第一個匹配結果及其分組字符串
	first := reg.FindSubmatch(src)
	for i := 0; i < len(first); i++ {
		fmt.Printf("%d: %s\n", i, first[i])
	}
	// 0: abc-def-ghi
	// 1: abc-def-ghi
	// 2: abc-def-
	// 3: abc-
}

------------------------------

// 示例：FindIndex、FindSubmatchIndex
func main() {
	pat := `(((abc.)def.)ghi)`
	reg := regexp.MustCompile(pat)

	src := []byte(`abc-def-ghi abc+def+ghi`)

	// 查找第一個匹配結果
	matched := reg.FindIndex(src)
	fmt.Printf("%v\n", matched) // [0 11]
	m := matched[0]
	n := matched[1]
	fmt.Printf("%s\n\n", src[m:n]) // abc-def-ghi

	// 查找第一個匹配結果及其分組字符串
	matched = reg.FindSubmatchIndex(src)
	fmt.Printf("%v\n", matched) // [0 11 0 11 0 8 0 4]
	for i := 0; i < len(matched)/2; i++ {
		m := matched[i*2]
		n := matched[i*2+1]
		fmt.Printf("%s\n", src[m:n])
	}
	// abc-def-ghi
	// abc-def-ghi
	// abc-def-
	// abc-
}

------------------------------

// 功能同上，只不過返回多個匹配的結果，而不僅是第一個。
// n 是查找次數，負數表示不限次數。
func (re *Regexp) FindAll(b []byte, n int) [][]byte
func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte

func (re *Regexp) FindAllIndex(b []byte, n int) [][]int
func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int

------------------------------

// 示例：FindAll、FindAllSubmatch
func main() {
	pat := `(((abc.)def.)ghi)`
	reg := regexp.MustCompile(pat)

	s := []byte(`abc-def-ghi abc+def+ghi`)

	// 查找全部匹配結果
	for _, one := range reg.FindAll(s, -1) {
		fmt.Printf("%s\n", one)
	}
	// abc-def-ghi
	// abc+def+ghi

	// 查找全部匹配結果及其分組字符串
	all := reg.FindAllSubmatch(s, -1)
	for i := 0; i < len(all); i++ {
		fmt.Println()
		one := all[i]
		for i := 0; i < len(one); i++ {
			fmt.Printf("%d: %s\n", i, one[i])
		}
	}
	// 0: abc-def-ghi
	// 1: abc-def-ghi
	// 2: abc-def-
	// 3: abc-

	// 0: abc+def+ghi
	// 1: abc+def+ghi
	// 2: abc+def+
	// 3: abc+
}

------------------------------

// 功能同上，只不過在字符串中查找
func (re *Regexp) FindString(s string) string
func (re *Regexp) FindStringSubmatch(s string) []string

func (re *Regexp) FindStringIndex(s string) (loc []int)
func (re *Regexp) FindStringSubmatchIndex(s string) []int

func (re *Regexp) FindAllString(s string, n int) []string
func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string

func (re *Regexp) FindAllStringIndex(s string, n int) [][]int
func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int

// 功能同上，只不過在 io.RuneReader 中查找。
func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int)
func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int

------------------------------

// 替換（不會修改參數，結果是參數的副本）

// 將 src 中匹配的內容替換爲 repl（repl 中可使用 $1 $name 等分組引用符）。
func (re *Regexp) ReplaceAll(src, repl []byte) []byte

// 將 src 中匹配的內容通過 repl 函數處理後替換回去。
func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte

// 將 src 中匹配的內容替換爲 repl（repl 爲字面值，不解析其中的 $1 $name 等）。
func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte

// 功能同上，只不過在字符串中查找。
func (re *Regexp) ReplaceAllString(src, repl string) string
func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string
func (re *Regexp) ReplaceAllLiteralString(src, repl string) string

// Expand 要配合 FindSubmatchIndex 一塊兒使用。FindSubmatchIndex 在 src 中進行
// 查找，將結果存入 match 中。這樣就能夠經過 src 和 match 獲得匹配的字符串。
// template 是替換內容，可使用分組引用符 $一、$二、$name 等。Expane 將其中的分
// 組引用符替換爲前面匹配到的字符串。而後追加到 dst 的尾部（dst 能夠爲空）。
// 說白了 Expand 就是一次替換過程，只不過須要 FindSubmatchIndex 的配合。
func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte

// 功能同上，參數爲字符串。
func (re *Regexp) ExpandString(dst []byte, template string, src string, match []int) []byte

------------------------------

// 示例：Expand
func main() {
	pat := `(((abc.)def.)ghi)`
	reg := regexp.MustCompile(pat)

	src := []byte(`abc-def-ghi abc+def+ghi`)
	template := []byte(`$0   $1   $2   $3`)

	// 替換第一次匹配結果
	match := reg.FindSubmatchIndex(src)
	fmt.Printf("%v\n", match) // [0 11 0 11 0 8 0 4]
	dst := reg.Expand(nil, template, src, match)
	fmt.Printf("%s\n\n", dst)
	// abc-def-ghi   abc-def-ghi   abc-def-   abc-

	// 替換全部匹配結果
	for _, match := range reg.FindAllSubmatchIndex(src, -1) {
		fmt.Printf("%v\n", match)
		dst := reg.Expand(nil, template, src, match)
		fmt.Printf("%s\n", dst)
	}
	// [0 11 0 11 0 8 0 4]
	// abc-def-ghi   abc-def-ghi   abc-def-   abc-
	// [12 23 12 23 12 20 12 16]
	// abc+def+ghi   abc+def+ghi   abc+def+   abc+
}

------------------------------

// 其它

// 以 s 中的匹配結果做爲分割符將 s 分割成字符串列表。
// n 是分割次數，負數表示不限次數。
func (re *Regexp) Split(s string, n int) []string

// 將當前正則對象複製一份。在多例程中使用同一正則對象時，給每一個例程分配一個
// 正則對象的副本，能夠避免多例程對單個正則對象的爭奪鎖定。
func (re *Regexp) Copy() *Regexp

------------------------------------------------------------
相關標籤/搜索
每日一句
每一个你不满意的现在，都有一个你没有努力的曾经。