-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregexer.go
76 lines (67 loc) · 1.67 KB
/
regexer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
/*
io.Writer that output matches based on the given regex
*/
package regexer
import (
"errors"
"regexp"
"time"
)
// Make buffer size in memory (4K)
const MAX_BUFFER_SIZE = 1024 * 4
type Regexer struct {
rxBuf []byte // data buffer
regex *regexp.Regexp
C chan [][]byte // Returns regexp [][]byte matches, one at a time
}
// Convert a regex match [][]byte to [][]string
func BytesToString(m [][]byte) []string {
s := make([]string, len(m))
for i, b := range m {
s[i] = string(b)
}
return s
}
// Returns a new *Regexer
func NewRegexer(regex *regexp.Regexp) *Regexer {
return &Regexer{
rxBuf: make([]byte, 0),
regex: regex,
C: make(chan [][]byte, 10),
}
}
// Closes the *Regexer's channel
func (r *Regexer) Close() {
close(r.C)
}
// Write data and attempt to match, adding to the internal buffer
func (r *Regexer) Write(b []byte) (int, error) {
r.rxBuf = append(r.rxBuf, b...)
matches := r.regex.FindAllSubmatchIndex(r.rxBuf, -1)
lastByteUsed := 0
var err error
for _, match := range matches {
retLen := len(match)
ret := make([][]byte, 0)
for i := 0; i < retLen-1; i += 2 {
// fmt.Printf("i: %d, retLen: %d, match[i]: %d, match: % X\n\n", i, retLen, match[i], match)
if match[i] > -1 && match[i+1] > -1 {
ret = append(ret, r.rxBuf[match[i]:match[i+1]])
}
}
select { // Do not block if chan not being emptied
case r.C <- ret:
case <-time.After(time.Second * 10):
err = errors.New("match channel blocked")
}
if lastByteUsed < match[1] {
lastByteUsed = match[1]
}
}
r.rxBuf = r.rxBuf[lastByteUsed:]
// Purge old data
if len(r.rxBuf) > MAX_BUFFER_SIZE {
r.rxBuf = r.rxBuf[len(r.rxBuf)-MAX_BUFFER_SIZE:]
}
return len(b), err
}