Skip to content

Commit

Permalink
Move parser to its own module. (#16)
Browse files Browse the repository at this point in the history
The purpose of this change preventing that the parser's structures generated by flexgo and goyacc pollute the Gyp's API.
  • Loading branch information
plusvic authored Dec 3, 2019
1 parent f676051 commit d2aefe1
Show file tree
Hide file tree
Showing 8 changed files with 449 additions and 341 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
all: proto grammar y2j

grammar:
flexgo -G -v -o lexer.go lexer.l && goyacc -p yr -o parser.go grammar.y
flexgo -G -v -o parser/lexer.go parser/lexer.l && goyacc -p yr -o parser/parser.go parser/grammar.y

hexgrammar:
flexgo -G -v -o hex/hex_lexer.go hex/hex_lexer.l && goyacc -p xx -o hex/hex_parser.go hex/hex_grammar.y
Expand All @@ -21,4 +21,4 @@ release: parser lexer
GOOS=windows go build -o y2j.exe github.com/VirusTotal/gyp/cmd/y2j

clean:
rm lexer.go parser.go yara.pb.go y.output y2j j2y
rm parser/lexer.go parser/parser.go pb/yara.pb.go y.output y2j j2y
105 changes: 0 additions & 105 deletions adapter.go

This file was deleted.

36 changes: 36 additions & 0 deletions gyp.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
Package gyp provides a pure Go parser for YARA rules.
For example, you can parse YARA rules from a string:
ruleset, err := gyp.ParseString("rule test { condition: true }")
Or from a io.Reader:
ruleset, err := gyp.Parse(os.Stdin)
The rules can be written to source again:
err := ruleset.WriteSource(os.Stdout)
Or you can iterate over the rules and inspect their attributes:
for _, rule := ruleset.Rules {
fmt.Println(rule.Identifier)
}
*/
package gyp

import (
"bytes"
"io"

"github.com/VirusTotal/gyp/ast"
"github.com/VirusTotal/gyp/parser"
)

// Parse parses a YARA rule from the provided input source.
func Parse(input io.Reader) (rs *ast.RuleSet, err error) {
return parser.Parse(input)
}

// ParseString parses a YARA rule from the provided string.
func ParseString(s string) (*ast.RuleSet, error) {
return Parse(bytes.NewBufferString(s))
}
File renamed without changes.
104 changes: 97 additions & 7 deletions grammar.y → parser/grammar.y
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

%{
package gyp
package parser

import (
"fmt"
"io"
"io/ioutil"
"strings"
"github.com/VirusTotal/gyp/ast"
gyperror "github.com/VirusTotal/gyp/error"
Expand All @@ -55,6 +58,93 @@ type stringModifiers struct {
XorMax int32
}

// Lexer is an adapter that fits the flexgo lexer ("Scanner") into goyacc
type lexer struct {
scanner Scanner
err gyperror.Error
ruleSet *ast.RuleSet
}

// Lex provides the interface expected by the goyacc parser.
// It sets the context's lval pointer (defined in the lexer file)
// to the one passed as an argument so that the parser actions
// can make use of it.
func (l *lexer) Lex(lval *yrSymType) int {
l.scanner.Context.lval = lval
r := l.scanner.Lex()
if r.Error.Code != 0 {
r.Error.Line = l.scanner.Lineno
panic(r.Error)
}
return r.Token
}

// Error satisfies the interface expected of the goyacc parser.
func (l *lexer) Error(msg string) {
l.err = gyperror.Error{
Code: gyperror.LexicalError,
Line: l.scanner.Lineno,
Message: msg,
}
}

// setError sets the lexer error. The error message can be built by passing
// a format string and arguments as fmt.Sprintf. This function returns 1 as
// it's intended to be used by Parse as:
// return lexer.setError(...)
// By returning 1 from Parse the parsing is aborted.
func (l *lexer) setError(code gyperror.Code, format string, a ...interface{}) int {
l.err = gyperror.Error{
Code: code,
Line: l.scanner.Lineno,
Message: fmt.Sprintf(format, a...),
}
return 1
}


// Helper function that casts a yrLexer interface to a lexer struct.
func asLexer(l yrLexer) *lexer {
return l.(*lexer)
}


func Parse(input io.Reader) (rs *ast.RuleSet, err error) {
defer func() {
if r := recover(); r != nil {
if yaraError, ok := r.(gyperror.Error); ok {
err = yaraError
} else {
err = gyperror.Error{
Code: gyperror.UnknownError,
Message: fmt.Sprintf("%v", r),
}
}
}
}()

lexer := &lexer{
scanner: *NewScanner(),
ruleSet: &ast.RuleSet{
Imports: make([]string, 0),
Rules: make([]*ast.Rule, 0),
},
}
lexer.scanner.In = input
lexer.scanner.Out = ioutil.Discard

if result := yrParse(lexer); result != 0 {
err = lexer.err
}

return lexer.ruleSet, err
}


func init() {
yrErrorVerbose = true
}


%}

Expand Down Expand Up @@ -225,7 +315,7 @@ rule
// Forbid duplicate rules
for _, r := range lexer.ruleSet.Rules {
if $3 == r.Identifier {
return lexer.SetError(
return lexer.setError(
gyperror.DuplicateRuleError, `duplicate rule "%s"`, $3)
}
}
Expand Down Expand Up @@ -329,7 +419,7 @@ tag_list

for _, tag := range $1 {
if tag == $2 {
return lexer.SetError(
return lexer.setError(
gyperror.DuplicateTagError, `duplicate tag "%s"`, $2)
}
}
Expand Down Expand Up @@ -449,7 +539,7 @@ string_modifiers
| string_modifiers string_modifier
{
if $1.modifiers & $2.modifiers != 0 {
return asLexer(yrlex).SetError(
return asLexer(yrlex).setError(
gyperror.DuplicateModifierError, `duplicate modifier`)
}

Expand Down Expand Up @@ -492,19 +582,19 @@ string_modifier
lexer := asLexer(yrlex)

if $3 < 0 {
return lexer.SetError(
return lexer.setError(
gyperror.InvalidStringModifierError,
"lower bound for xor range exceeded (min: 0)")
}

if $5 > 255 {
return lexer.SetError(
return lexer.setError(
gyperror.InvalidStringModifierError,
"upper bound for xor range exceeded (max: 255)")
}

if $3 > $5 {
return lexer.SetError(
return lexer.setError(
gyperror.InvalidStringModifierError,
"xor lower bound exceeds upper bound")
}
Expand Down
Loading

0 comments on commit d2aefe1

Please sign in to comment.