Skip to content
This repository was archived by the owner on May 16, 2025. It is now read-only.

Commit d2aefe1

Browse files
authored
Move parser to its own module. (#16)
The purpose of this change preventing that the parser's structures generated by flexgo and goyacc pollute the Gyp's API.
1 parent f676051 commit d2aefe1

File tree

8 files changed

+449
-341
lines changed

8 files changed

+449
-341
lines changed

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
all: proto grammar y2j
22

33
grammar:
4-
flexgo -G -v -o lexer.go lexer.l && goyacc -p yr -o parser.go grammar.y
4+
flexgo -G -v -o parser/lexer.go parser/lexer.l && goyacc -p yr -o parser/parser.go parser/grammar.y
55

66
hexgrammar:
77
flexgo -G -v -o hex/hex_lexer.go hex/hex_lexer.l && goyacc -p xx -o hex/hex_parser.go hex/hex_grammar.y
@@ -21,4 +21,4 @@ release: parser lexer
2121
GOOS=windows go build -o y2j.exe github.com/VirusTotal/gyp/cmd/y2j
2222

2323
clean:
24-
rm lexer.go parser.go yara.pb.go y.output y2j j2y
24+
rm parser/lexer.go parser/parser.go pb/yara.pb.go y.output y2j j2y

adapter.go

Lines changed: 0 additions & 105 deletions
This file was deleted.

gyp.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
Package gyp provides a pure Go parser for YARA rules.
3+
4+
For example, you can parse YARA rules from a string:
5+
ruleset, err := gyp.ParseString("rule test { condition: true }")
6+
7+
Or from a io.Reader:
8+
ruleset, err := gyp.Parse(os.Stdin)
9+
10+
The rules can be written to source again:
11+
err := ruleset.WriteSource(os.Stdout)
12+
13+
Or you can iterate over the rules and inspect their attributes:
14+
for _, rule := ruleset.Rules {
15+
fmt.Println(rule.Identifier)
16+
}
17+
*/
18+
package gyp
19+
20+
import (
21+
"bytes"
22+
"io"
23+
24+
"github.com/VirusTotal/gyp/ast"
25+
"github.com/VirusTotal/gyp/parser"
26+
)
27+
28+
// Parse parses a YARA rule from the provided input source.
29+
func Parse(input io.Reader) (rs *ast.RuleSet, err error) {
30+
return parser.Parse(input)
31+
}
32+
33+
// ParseString parses a YARA rule from the provided string.
34+
func ParseString(s string) (*ast.RuleSet, error) {
35+
return Parse(bytes.NewBufferString(s))
36+
}
File renamed without changes.

grammar.y renamed to parser/grammar.y

Lines changed: 97 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2828
*/
2929

3030
%{
31-
package gyp
31+
package parser
3232

3333
import (
34+
"fmt"
35+
"io"
36+
"io/ioutil"
3437
"strings"
3538
"github.com/VirusTotal/gyp/ast"
3639
gyperror "github.com/VirusTotal/gyp/error"
@@ -55,6 +58,93 @@ type stringModifiers struct {
5558
XorMax int32
5659
}
5760

61+
// Lexer is an adapter that fits the flexgo lexer ("Scanner") into goyacc
62+
type lexer struct {
63+
scanner Scanner
64+
err gyperror.Error
65+
ruleSet *ast.RuleSet
66+
}
67+
68+
// Lex provides the interface expected by the goyacc parser.
69+
// It sets the context's lval pointer (defined in the lexer file)
70+
// to the one passed as an argument so that the parser actions
71+
// can make use of it.
72+
func (l *lexer) Lex(lval *yrSymType) int {
73+
l.scanner.Context.lval = lval
74+
r := l.scanner.Lex()
75+
if r.Error.Code != 0 {
76+
r.Error.Line = l.scanner.Lineno
77+
panic(r.Error)
78+
}
79+
return r.Token
80+
}
81+
82+
// Error satisfies the interface expected of the goyacc parser.
83+
func (l *lexer) Error(msg string) {
84+
l.err = gyperror.Error{
85+
Code: gyperror.LexicalError,
86+
Line: l.scanner.Lineno,
87+
Message: msg,
88+
}
89+
}
90+
91+
// setError sets the lexer error. The error message can be built by passing
92+
// a format string and arguments as fmt.Sprintf. This function returns 1 as
93+
// it's intended to be used by Parse as:
94+
// return lexer.setError(...)
95+
// By returning 1 from Parse the parsing is aborted.
96+
func (l *lexer) setError(code gyperror.Code, format string, a ...interface{}) int {
97+
l.err = gyperror.Error{
98+
Code: code,
99+
Line: l.scanner.Lineno,
100+
Message: fmt.Sprintf(format, a...),
101+
}
102+
return 1
103+
}
104+
105+
106+
// Helper function that casts a yrLexer interface to a lexer struct.
107+
func asLexer(l yrLexer) *lexer {
108+
return l.(*lexer)
109+
}
110+
111+
112+
func Parse(input io.Reader) (rs *ast.RuleSet, err error) {
113+
defer func() {
114+
if r := recover(); r != nil {
115+
if yaraError, ok := r.(gyperror.Error); ok {
116+
err = yaraError
117+
} else {
118+
err = gyperror.Error{
119+
Code: gyperror.UnknownError,
120+
Message: fmt.Sprintf("%v", r),
121+
}
122+
}
123+
}
124+
}()
125+
126+
lexer := &lexer{
127+
scanner: *NewScanner(),
128+
ruleSet: &ast.RuleSet{
129+
Imports: make([]string, 0),
130+
Rules: make([]*ast.Rule, 0),
131+
},
132+
}
133+
lexer.scanner.In = input
134+
lexer.scanner.Out = ioutil.Discard
135+
136+
if result := yrParse(lexer); result != 0 {
137+
err = lexer.err
138+
}
139+
140+
return lexer.ruleSet, err
141+
}
142+
143+
144+
func init() {
145+
yrErrorVerbose = true
146+
}
147+
58148

59149
%}
60150

@@ -225,7 +315,7 @@ rule
225315
// Forbid duplicate rules
226316
for _, r := range lexer.ruleSet.Rules {
227317
if $3 == r.Identifier {
228-
return lexer.SetError(
318+
return lexer.setError(
229319
gyperror.DuplicateRuleError, `duplicate rule "%s"`, $3)
230320
}
231321
}
@@ -329,7 +419,7 @@ tag_list
329419

330420
for _, tag := range $1 {
331421
if tag == $2 {
332-
return lexer.SetError(
422+
return lexer.setError(
333423
gyperror.DuplicateTagError, `duplicate tag "%s"`, $2)
334424
}
335425
}
@@ -449,7 +539,7 @@ string_modifiers
449539
| string_modifiers string_modifier
450540
{
451541
if $1.modifiers & $2.modifiers != 0 {
452-
return asLexer(yrlex).SetError(
542+
return asLexer(yrlex).setError(
453543
gyperror.DuplicateModifierError, `duplicate modifier`)
454544
}
455545

@@ -492,19 +582,19 @@ string_modifier
492582
lexer := asLexer(yrlex)
493583

494584
if $3 < 0 {
495-
return lexer.SetError(
585+
return lexer.setError(
496586
gyperror.InvalidStringModifierError,
497587
"lower bound for xor range exceeded (min: 0)")
498588
}
499589

500590
if $5 > 255 {
501-
return lexer.SetError(
591+
return lexer.setError(
502592
gyperror.InvalidStringModifierError,
503593
"upper bound for xor range exceeded (max: 255)")
504594
}
505595

506596
if $3 > $5 {
507-
return lexer.SetError(
597+
return lexer.setError(
508598
gyperror.InvalidStringModifierError,
509599
"xor lower bound exceeds upper bound")
510600
}

0 commit comments

Comments
 (0)