// Copyright 2015 meisterluk. No rights reserved. Public Domain. // // This package illustrates the Go Regexp API. // // Related: // - http://golang.org/pkg/regexp/ // // - If you are not familiar with regex but go, use // https://github.com/StefanSchroeder/Golang-Regex-Tutorial // - If you are familiar with regex but not go, use // https://gobyexample.com/regular-expressions // - If you are familiar with both, continue š package main import ( "bytes" "fmt" "os" "regexp" "strings" ) // the regular expression to discuss var regex = ".*(ham|and ham and spam)" // the input to use with var input = "eggs and ham and spam" // parameter for infinite many matches const INFINITY = -1 // All modes that will be performed var modes = []string{ "Leftmost first entire one", "Leftmost first anyone", "Leftmost longest entire one", "Leftmost longest anyone", "Leftmost first entire all successive non-overlapping", "Leftmost first any all successive non-overlapping", "Leftmost longest entire all successive non-overlapping", "Leftmost longest any all successive non-overlapping", } // A match object containing all information. // Written to simplify output of matches type regexpMatch struct { original, sub string // original and matched substring from, to int // indices of matched substring groups []int // indices of matched groups names []string // group names } func underbrace(from, to int) string { var buf bytes.Buffer for i := 0; i < to; i++ { if i == from && to-1 <= from { buf.WriteString("ź") } else if i == from { buf.WriteString("ā°") } else if from < i && i < to-1 { buf.WriteString("ā") } else if i == to-1 { buf.WriteString("āÆ") } else { buf.WriteString(" ") } } return buf.String() } func (match regexpMatch) String() string { var buf bytes.Buffer buf.WriteString("Match ā ") if match.sub != "" { buf.WriteString(fmt.Sprintf("%s\n", match.original)) buf.WriteString(" ") buf.WriteString(underbrace(match.from, match.to)) buf.WriteString(fmt.Sprintf(" ā¦ '%s' [%dā%d]\n", match.sub, match.from, match.to)) buf.WriteString("\n") } else { buf.WriteString("-none-\n") return buf.String() } for i := 0; i < len(match.groups); i = i + 2 { if i == 0 { continue // skip match itself } var from = match.groups[i] var to = match.groups[i+1] buf.WriteString(strings.Repeat(" ", 20)) buf.WriteString("ā ") if len(match.names) > i && match.names[i] != "" { buf.WriteString(fmt.Sprintf("%s - group %s\n", match.original, match.names[i])) } else { buf.WriteString(fmt.Sprintf("%s - group %d\n", match.original, i/2)) } buf.WriteString(strings.Repeat(" ", 22)) buf.WriteString(underbrace(from, to)) buf.WriteString("\n") } return buf.String() } func metaInformation(input string, re *regexp.Regexp) string { var buf bytes.Buffer subexpr := re.NumSubexp() grps := "Nope" if subexpr != 0 { grps = fmt.Sprintf("Yes, %d", subexpr) } buf.WriteString(fmt.Sprintf("Regular expression ā %s\n", re)) buf.WriteString(fmt.Sprintf("Input ā %s\n", input)) buf.WriteString(fmt.Sprintf("Groups ā %s\n", grps)) return buf.String() } func matchAny(input string, re *regexp.Regexp) regexpMatch { any_match := re.FindString(input) am_indices := re.FindStringIndex(input) if am_indices == nil { return regexpMatch{} } groups := re.FindStringSubmatchIndex(input) names := re.SubexpNames() return regexpMatch{input, any_match, am_indices[0], am_indices[1], groups, names, } } func matchAll(input string, re *regexp.Regexp) []regexpMatch { all_matches := re.FindAllString(input, INFINITY) am_indices := re.FindAllStringIndex(input, INFINITY) if len(all_matches) == 0 { return make([]regexpMatch, 0) } names := re.SubexpNames() matches := make([]regexpMatch, len(all_matches)) for i, _ := range all_matches { begin := am_indices[i][0] groups := re.FindStringSubmatchIndex(input[begin:]) matches[i] = regexpMatch{input, all_matches[i], am_indices[i][0], am_indices[i][1], groups, names, } } return matches } func main() { regexes := make([]*regexp.Regexp, 4) regexes[0] = regexp.MustCompile(regex) // any left-most first matching regexes[1] = regexp.MustCompile("^(" + regex + ")$") // entire left-most first matching regexes[2] = regexp.MustCompilePOSIX(regex) // any left-most longest matching regexes[3] = regexp.MustCompilePOSIX("^(" + regex + ")$") // entire left-most longest matching for _, mode := range modes { var index = 0 if strings.Contains(mode, "ongest") { index += 2 } if strings.Contains(mode, "ntire") { index += 1 } var re *regexp.Regexp = regexes[index] fmt.Println(strings.Repeat("=", 80)) fmt.Printf("Mode ā %s\n", mode) fmt.Println(metaInformation(input, re)) if strings.Contains(mode, " all") { var match []regexpMatch = matchAll(input, re) for _, elem := range match { fmt.Println(elem) } } else { var match regexpMatch = matchAny(input, re) fmt.Println(match) } } os.Exit(0) }