Skip to content

Commit 6faf1d1

Browse files
authored
Merge branch 'master' into dependency_upgrade
2 parents a904cc3 + a60dd5b commit 6faf1d1

File tree

8 files changed

+169
-61
lines changed

8 files changed

+169
-61
lines changed

Diff for: .github/workflows/ci.yml

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
branches: [master]
6+
pull_request:
7+
8+
jobs:
9+
build:
10+
11+
runs-on: ubuntu-latest
12+
strategy:
13+
matrix:
14+
go-version: [ '1.22', '1.23', '1.24.x' ]
15+
16+
steps:
17+
- uses: actions/checkout@v4
18+
- name: Checkout submodules
19+
run: git submodule update --init --recursive
20+
21+
- name: Setup Go ${{ matrix.go-version }}
22+
uses: actions/setup-go@v5
23+
with:
24+
go-version: ${{ matrix.go-version }}
25+
26+
- name: Build
27+
run: go build -v ./...
28+
29+
- name: Run tests
30+
run: |
31+
go version
32+
cd uaparser
33+
go test -cover ./...

Diff for: README.md

+2
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ func main() {
5454
if err != nil {
5555
log.Fatal(err)
5656
}
57+
// or alternatively, using the golang version of our regexes we build from the yaml:
58+
// parser := uaparser.NewFromSaved()
5759

5860
client := parser.Parse(uagent)
5961

Diff for: test.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@ package main
33
import (
44
"bufio"
55
"fmt"
6-
"github.com/ua-parser/uap-go/uaparser"
76
"os"
87
"strconv"
98
"sync"
109
"time"
10+
11+
"github.com/ua-parser/uap-go/uaparser"
1112
)
1213

1314
func main() {
@@ -20,7 +21,7 @@ func main() {
2021
switch os.Args[1] {
2122
case "new":
2223
fmt.Println("Running new version of uap...")
23-
uaParser, _ := uaparser.NewWithOptions("./uap-core/regexes.yaml", (uaparser.EOsLookUpMode | uaparser.EUserAgentLookUpMode), 100, 20, true, true)
24+
uaParser, _ := uaparser.NewWithOptions("./uap-core/regexes.yaml", (uaparser.EOsLookUpMode | uaparser.EUserAgentLookUpMode), 100, 20, true, true, 1024)
2425
for i := 0; i < cLevel; i++ {
2526
wg.Add(1)
2627
go runTest(uaParser, i, &wg)
@@ -38,7 +39,7 @@ func main() {
3839
return
3940
case "both":
4041
fmt.Println("Running new version of uap...")
41-
uaParser, _ := uaparser.NewWithOptions("./uap-core/regexes.yaml", (uaparser.EOsLookUpMode | uaparser.EUserAgentLookUpMode), 100, 20, true, true)
42+
uaParser, _ := uaparser.NewWithOptions("./uap-core/regexes.yaml", (uaparser.EOsLookUpMode | uaparser.EUserAgentLookUpMode), 100, 20, true, true, 1024)
4243
for i := 0; i < cLevel; i++ {
4344
wg.Add(1)
4445
runTest(uaParser, i, &wg)

Diff for: uaparser/benchmark_test.go

+39-1
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,34 @@
11
package uaparser
22

33
import (
4+
"fmt"
45
"log"
6+
"math/rand"
7+
"os"
8+
"strings"
59
"testing"
610
)
711

812
var benchedParser *Parser
913
var benchedParserWithOptions *Parser
14+
var largeUasSample []string
1015

1116
func init() {
1217
var err error
1318
benchedParser, err = New("../uap-core/regexes.yaml")
1419
if err != nil {
1520
log.Fatal(err)
1621
}
17-
benchedParserWithOptions, err = NewWithOptions("../uap-core/regexes.yaml", (EOsLookUpMode | EUserAgentLookUpMode), 100, 20, true, true)
22+
benchedParserWithOptions, err = NewWithOptions("../uap-core/regexes.yaml", (EOsLookUpMode | EUserAgentLookUpMode), 100, 20, true, true, cDefaultCacheSize)
1823
if err != nil {
1924
log.Fatal(err)
2025
}
26+
27+
uasContent, err := os.ReadFile("../uas")
28+
if err != nil {
29+
log.Fatal(err)
30+
}
31+
largeUasSample = strings.Split(string(uasContent), "\n")
2132
}
2233

2334
func BenchmarkParser(b *testing.B) {
@@ -36,6 +47,33 @@ func BenchmarkParserWithOptions(b *testing.B) {
3647
}
3748
}
3849

50+
func BenchmarkParserWithDifferentCacheSize(b *testing.B) {
51+
sizes := []int{cDefaultCacheSize, cDefaultCacheSize*2, cDefaultCacheSize*3, cDefaultCacheSize*4}
52+
53+
for _, size := range sizes {
54+
parser, err := NewWithOptions(
55+
"../uap-core/regexes.yaml",
56+
EOsLookUpMode | EUserAgentLookUpMode | EDeviceLookUpMode,
57+
cDefaultMissesTreshold,
58+
cDefaultMatchIdxNotOk,
59+
false,
60+
false,
61+
size)
62+
if err != nil {
63+
log.Fatal(err)
64+
}
65+
66+
b.ResetTimer()
67+
b.Run(fmt.Sprintf("CacheSize=%d", size), func(b *testing.B) {
68+
for i := 0; i < b.N; i++ {
69+
index := rand.Intn(len(largeUasSample))
70+
parser.Parse(largeUasSample[index])
71+
}
72+
})
73+
}
74+
75+
}
76+
3977
var uas = []string{
4078
"Mozilla/5.0 (Windows NT 6.0; rv:2.0b6pre) Gecko/20100907 Firefox/4.0b6pre",
4179
"Mozilla/5.0 (Windows NT 5.2; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",

Diff for: uaparser/cache.go

+1-2
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,11 @@ type cache struct {
1212
userAgent *lru.ARCCache
1313
}
1414

15-
func newCache() *cache {
15+
func newCache(cacheSize int) *cache {
1616
var (
1717
c cache
1818
err error
1919
)
20-
const cacheSize = 1024
2120
// NewARC only fails when cacheSize <= 0.
2221
// Also, returning an error up the stack would break the API.
2322
c.device, err = lru.NewARC(cacheSize)

Diff for: uaparser/parser.go

+63-35
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,15 @@ type Client struct {
128128
Device *Device
129129
}
130130

131+
type parserConfig struct {
132+
Mode int
133+
UseSort bool
134+
DebugMode bool
135+
CacheSize int
136+
MissesThreshold uint64
137+
MatchIdxNotOk int
138+
}
139+
131140
type Parser struct {
132141
/* atomic operation are done on the following unit64.
133142
* These must be 64bit aligned. On 32bit architectures
@@ -136,12 +145,10 @@ type Parser struct {
136145
OsMisses uint64
137146
DeviceMisses uint64
138147

148+
config *parserConfig
139149
cache *cache
140150

141151
RegexesDefinitions
142-
Mode int
143-
UseSort bool
144-
debugMode bool
145152
}
146153

147154
const (
@@ -152,11 +159,8 @@ const (
152159
cDefaultMissesTreshold = 500000
153160
cDefaultMatchIdxNotOk = 20
154161
cDefaultSortOption = false
155-
)
156-
157-
var (
158-
missesTreshold = uint64(500000)
159-
matchIdxNotOk = 20
162+
cDefaultDebugMode = false
163+
cDefaultCacheSize = 1024
160164
)
161165

162166
func (parser *Parser) mustCompile() { // until we can use yaml.UnmarshalYAML with embedded pointer struct
@@ -174,24 +178,46 @@ func (parser *Parser) mustCompile() { // until we can use yaml.UnmarshalYAML wit
174178
}
175179
}
176180

177-
func NewWithOptions(regexFile string, mode, treshold, topCnt int, useSort, debugMode bool) (*Parser, error) {
178-
data, err := os.ReadFile(regexFile)
181+
func defaultParserConfig() *parserConfig {
182+
return &parserConfig{
183+
Mode: EOsLookUpMode | EUserAgentLookUpMode | EDeviceLookUpMode,
184+
UseSort: cDefaultSortOption,
185+
DebugMode: cDefaultDebugMode,
186+
CacheSize: cDefaultCacheSize,
187+
MissesThreshold: cMinMissesTreshold,
188+
MatchIdxNotOk: cDefaultMatchIdxNotOk,
189+
}
190+
}
191+
192+
func NewWithOptions(regexFile string, mode, treshold, topCnt int, useSort, debugMode bool, cacheSize int) (*Parser, error) {
193+
data, err := ioutil.ReadFile(regexFile)
179194
if nil != err {
180195
return nil, err
181196
}
197+
198+
cfg := &parserConfig{
199+
Mode: mode,
200+
UseSort: useSort,
201+
DebugMode: debugMode,
202+
MatchIdxNotOk: cDefaultMatchIdxNotOk,
203+
MissesThreshold: cDefaultMissesTreshold,
204+
CacheSize: cDefaultCacheSize,
205+
}
206+
182207
if topCnt >= 0 {
183-
matchIdxNotOk = topCnt
208+
cfg.MatchIdxNotOk = topCnt
184209
}
185210
if treshold > cMinMissesTreshold {
186-
missesTreshold = uint64(treshold)
211+
cfg.MissesThreshold = uint64(treshold)
212+
}
213+
if cacheSize > 0 {
214+
cfg.CacheSize = cacheSize
187215
}
188-
parser, err := NewFromBytes(data)
216+
217+
parser, err := newFromBytes(data, cfg)
189218
if err != nil {
190219
return nil, err
191220
}
192-
parser.Mode = mode
193-
parser.UseSort = useSort
194-
parser.debugMode = debugMode
195221
return parser, nil
196222
}
197223

@@ -200,17 +226,15 @@ func New(regexFile string) (*Parser, error) {
200226
if nil != err {
201227
return nil, err
202228
}
203-
matchIdxNotOk = cDefaultMatchIdxNotOk
204-
missesTreshold = cDefaultMissesTreshold
205-
parser, err := NewFromBytes(data)
229+
parser, err := newFromBytes(data, defaultParserConfig())
206230
if err != nil {
207231
return nil, err
208232
}
209233
return parser, nil
210234
}
211235

212236
func NewFromSaved() *Parser {
213-
parser, err := NewFromBytes(DefinitionYaml)
237+
parser, err := newFromBytes(DefinitionYaml, defaultParserConfig())
214238
if err != nil {
215239
// if the YAML is malformed, it's a programmatic error inside what
216240
// we've statically-compiled in our binary. Panic!
@@ -220,9 +244,13 @@ func NewFromSaved() *Parser {
220244
}
221245

222246
func NewFromBytes(data []byte) (*Parser, error) {
247+
return newFromBytes(data, defaultParserConfig())
248+
}
249+
250+
func newFromBytes(data []byte, config *parserConfig) (*Parser, error) {
223251
parser := &Parser{
224-
Mode: EOsLookUpMode | EUserAgentLookUpMode | EDeviceLookUpMode,
225-
cache: newCache(),
252+
config: config,
253+
cache: newCache(config.CacheSize),
226254
}
227255
if err := yaml.Unmarshal(data, &parser.RegexesDefinitions); err != nil {
228256
return nil, err
@@ -236,7 +264,7 @@ func NewFromBytes(data []byte) (*Parser, error) {
236264
func (parser *Parser) Parse(line string) *Client {
237265
cli := new(Client)
238266
var wg sync.WaitGroup
239-
if EUserAgentLookUpMode&parser.Mode == EUserAgentLookUpMode {
267+
if EUserAgentLookUpMode&parser.config.Mode == EUserAgentLookUpMode {
240268
wg.Add(1)
241269
go func() {
242270
defer wg.Done()
@@ -245,7 +273,7 @@ func (parser *Parser) Parse(line string) *Client {
245273
parser.RUnlock()
246274
}()
247275
}
248-
if EOsLookUpMode&parser.Mode == EOsLookUpMode {
276+
if EOsLookUpMode&parser.config.Mode == EOsLookUpMode {
249277
wg.Add(1)
250278
go func() {
251279
defer wg.Done()
@@ -254,7 +282,7 @@ func (parser *Parser) Parse(line string) *Client {
254282
parser.RUnlock()
255283
}()
256284
}
257-
if EDeviceLookUpMode&parser.Mode == EDeviceLookUpMode {
285+
if EDeviceLookUpMode&parser.config.Mode == EDeviceLookUpMode {
258286
wg.Add(1)
259287
go func() {
260288
defer wg.Done()
@@ -264,7 +292,7 @@ func (parser *Parser) Parse(line string) *Client {
264292
}()
265293
}
266294
wg.Wait()
267-
if parser.UseSort {
295+
if parser.config.UseSort {
268296
checkAndSort(parser)
269297
}
270298
return cli
@@ -290,7 +318,7 @@ func (parser *Parser) ParseUserAgent(line string) *UserAgent {
290318
if !found {
291319
ua.Family = "Other"
292320
}
293-
if foundIdx > matchIdxNotOk {
321+
if foundIdx > parser.config.MatchIdxNotOk {
294322
atomic.AddUint64(&parser.UserAgentMisses, 1)
295323
}
296324
parser.cache.userAgent.Add(line, ua)
@@ -318,7 +346,7 @@ func (parser *Parser) ParseOs(line string) *Os {
318346
if !found {
319347
os.Family = "Other"
320348
}
321-
if foundIdx > matchIdxNotOk {
349+
if foundIdx > parser.config.MatchIdxNotOk {
322350
atomic.AddUint64(&parser.OsMisses, 1)
323351
}
324352

@@ -347,7 +375,7 @@ func (parser *Parser) ParseDevice(line string) *Device {
347375
if !found {
348376
dvc.Family = "Other"
349377
}
350-
if foundIdx > matchIdxNotOk {
378+
if foundIdx > parser.config.MatchIdxNotOk {
351379
atomic.AddUint64(&parser.DeviceMisses, 1)
352380
}
353381

@@ -357,26 +385,26 @@ func (parser *Parser) ParseDevice(line string) *Device {
357385

358386
func checkAndSort(parser *Parser) {
359387
parser.Lock()
360-
if atomic.LoadUint64(&parser.UserAgentMisses) >= missesTreshold {
361-
if parser.debugMode {
388+
if atomic.LoadUint64(&parser.UserAgentMisses) >= parser.config.MissesThreshold {
389+
if parser.config.DebugMode {
362390
fmt.Printf("%s\tSorting UserAgents slice\n", time.Now())
363391
}
364392
parser.UserAgentMisses = 0
365393
sort.Sort(UserAgentSorter(parser.UA))
366394
}
367395
parser.Unlock()
368396
parser.Lock()
369-
if atomic.LoadUint64(&parser.OsMisses) >= missesTreshold {
370-
if parser.debugMode {
397+
if atomic.LoadUint64(&parser.OsMisses) >= parser.config.MissesThreshold {
398+
if parser.config.DebugMode {
371399
fmt.Printf("%s\tSorting OS slice\n", time.Now())
372400
}
373401
parser.OsMisses = 0
374402
sort.Sort(OsSorter(parser.OS))
375403
}
376404
parser.Unlock()
377405
parser.Lock()
378-
if atomic.LoadUint64(&parser.DeviceMisses) >= missesTreshold {
379-
if parser.debugMode {
406+
if atomic.LoadUint64(&parser.DeviceMisses) >= parser.config.MissesThreshold {
407+
if parser.config.DebugMode {
380408
fmt.Printf("%s\tSorting Device slice\n", time.Now())
381409
}
382410
parser.DeviceMisses = 0

0 commit comments

Comments
 (0)