Skip to content

Commit eef18e3

Browse files
feat!: Rework bot detection rule with allow/deny configuration (#1437)
This reworks the bot detection rule to be configured like the sensitive info rule, which is to say you can either `allow` or `deny` list of bots. I've also reworked the bot detection to look for almost 600 well-known bots. Closes #39 - we've changed the configuration format and have validation on `allow` and `deny`.
1 parent 4cb8098 commit eef18e3

31 files changed

+1950
-861
lines changed

.github/dependabot.yml

+57-17
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,63 @@ updates:
8484
# patterns:
8585
# - "*"
8686

87+
- package-ecosystem: npm
88+
directory: /examples/express-bots
89+
schedule:
90+
# Our dependencies should be checked daily
91+
interval: daily
92+
assignees:
93+
- blaine-arcjet
94+
- e-moran
95+
reviewers:
96+
- blaine-arcjet
97+
- e-moran
98+
commit-message:
99+
prefix: deps(example)
100+
prefix-development: deps(example)
101+
groups:
102+
dependencies:
103+
patterns:
104+
- "*"
105+
106+
- package-ecosystem: npm
107+
directory: /examples/express-newman
108+
schedule:
109+
# Our dependencies should be checked daily
110+
interval: daily
111+
assignees:
112+
- blaine-arcjet
113+
- e-moran
114+
reviewers:
115+
- blaine-arcjet
116+
- e-moran
117+
commit-message:
118+
prefix: deps(example)
119+
prefix-development: deps(example)
120+
groups:
121+
dependencies:
122+
patterns:
123+
- "*"
124+
125+
- package-ecosystem: npm
126+
directory: /examples/express-sensitive-info
127+
schedule:
128+
# Our dependencies should be checked daily
129+
interval: daily
130+
assignees:
131+
- blaine-arcjet
132+
- e-moran
133+
reviewers:
134+
- blaine-arcjet
135+
- e-moran
136+
commit-message:
137+
prefix: deps(example)
138+
prefix-development: deps(example)
139+
groups:
140+
dependencies:
141+
patterns:
142+
- "*"
143+
87144
- package-ecosystem: npm
88145
directory: /examples/nextjs-14-app-dir-rl
89146
schedule:
@@ -430,23 +487,6 @@ updates:
430487
patterns:
431488
- "*"
432489

433-
- package-ecosystem: npm
434-
directory: /examples/express-sensitive-info
435-
schedule:
436-
# Our dependencies should be checked daily
437-
interval: daily
438-
assignees:
439-
- blaine-arcjet
440-
reviewers:
441-
- blaine-arcjet
442-
commit-message:
443-
prefix: deps(example)
444-
prefix-development: deps(example)
445-
groups:
446-
dependencies:
447-
patterns:
448-
- "*"
449-
450490
- package-ecosystem: npm
451491
directory: /examples/nodejs-express-launchdarkly
452492
schedule:

analyze/edge-light.ts

+12-25
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@ import { instantiate } from "./wasm/arcjet_analyze_js_req.component.js";
44
import type {
55
ImportObject,
66
EmailValidationConfig,
7-
BotDetectionResult,
8-
BotType,
97
EmailValidationResult,
108
DetectedSensitiveInfoEntity,
119
SensitiveInfoEntities,
1210
SensitiveInfoEntity,
1311
SensitiveInfoResult,
12+
BotConfig,
13+
BotResult,
1414
} from "./wasm/arcjet_analyze_js_req.component.js";
1515
import type { ArcjetJsReqSensitiveInformationIdentifier } from "./wasm/interfaces/arcjet-js-req-sensitive-information-identifier.js";
1616

@@ -115,20 +115,7 @@ async function init(
115115

116116
export {
117117
type EmailValidationConfig,
118-
type BotType,
119-
/**
120-
* Represents the result of the bot detection.
121-
*
122-
* @property `botType` - What type of bot this is. This will be one of `BotType`.
123-
* @property `botScore` - A score ranging from 0 to 99 representing the degree of
124-
* certainty. The higher the number within the type category, the greater the
125-
* degree of certainty. E.g. `BotType.Automated` with a score of 1 means we are
126-
* sure the request was made by an automated bot. `BotType.LikelyNotABot` with a
127-
* score of 30 means we don't think this request was a bot, but it's lowest
128-
* confidence level. `BotType.LikelyNotABot` with a score of 99 means we are
129-
* almost certain this request was not a bot.
130-
*/
131-
type BotDetectionResult,
118+
type BotConfig,
132119
type DetectedSensitiveInfoEntity,
133120
type SensitiveInfoEntity,
134121
type DetectSensitiveInfoFunction,
@@ -173,7 +160,7 @@ export async function isValidEmail(
173160
if (typeof analyze !== "undefined") {
174161
return analyze.isValidEmail(candidate, optionsOrDefault);
175162
} else {
176-
// Skip the local evaluation of the rule if WASM is not available
163+
// Skip the local evaluation of the rule if Wasm is not available
177164
return {
178165
validity: "valid",
179166
blocked: [],
@@ -183,22 +170,22 @@ export async function isValidEmail(
183170

184171
export async function detectBot(
185172
context: AnalyzeContext,
186-
headers: string,
187-
patterns_add: string,
188-
patterns_remove: string,
189-
): Promise<BotDetectionResult> {
173+
request: AnalyzeRequest,
174+
options: BotConfig,
175+
): Promise<BotResult> {
190176
const analyze = await init(context);
191177

192178
if (typeof analyze !== "undefined") {
193-
return analyze.detectBot(headers, patterns_add, patterns_remove);
179+
return analyze.detectBot(JSON.stringify(request), options);
194180
} else {
195-
// TODO: Fallback to JS if we don't have WASM?
181+
// Skip the local evaluation of the rule if Wasm is not available
196182
return {
197-
botType: "not-analyzed",
198-
botScore: 0,
183+
allowed: [],
184+
denied: [],
199185
};
200186
}
201187
}
188+
202189
export async function detectSensitiveInfo(
203190
context: AnalyzeContext,
204191
candidate: string,

analyze/index.ts

+10-24
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@ import { instantiate } from "./wasm/arcjet_analyze_js_req.component.js";
44
import type {
55
ImportObject,
66
EmailValidationConfig,
7-
BotDetectionResult,
8-
BotType,
97
EmailValidationResult,
108
DetectedSensitiveInfoEntity,
119
SensitiveInfoEntities,
1210
SensitiveInfoEntity,
1311
SensitiveInfoResult,
12+
BotConfig,
13+
BotResult,
1414
} from "./wasm/arcjet_analyze_js_req.component.js";
1515
import type { ArcjetJsReqSensitiveInformationIdentifier } from "./wasm/interfaces/arcjet-js-req-sensitive-information-identifier.js";
1616

@@ -129,20 +129,7 @@ async function init(
129129

130130
export {
131131
type EmailValidationConfig,
132-
type BotType,
133-
/**
134-
* Represents the result of the bot detection.
135-
*
136-
* @property `botType` - What type of bot this is. This will be one of `BotType`.
137-
* @property `botScore` - A score ranging from 0 to 99 representing the degree of
138-
* certainty. The higher the number within the type category, the greater the
139-
* degree of certainty. E.g. `BotType.Automated` with a score of 1 means we are
140-
* sure the request was made by an automated bot. `BotType.LikelyNotABot` with a
141-
* score of 30 means we don't think this request was a bot, but it's lowest
142-
* confidence level. `BotType.LikelyNotABot` with a score of 99 means we are
143-
* almost certain this request was not a bot.
144-
*/
145-
type BotDetectionResult,
132+
type BotConfig,
146133
type DetectedSensitiveInfoEntity,
147134
type SensitiveInfoEntity,
148135
type DetectSensitiveInfoFunction,
@@ -197,19 +184,18 @@ export async function isValidEmail(
197184

198185
export async function detectBot(
199186
context: AnalyzeContext,
200-
headers: string,
201-
patterns_add: string,
202-
patterns_remove: string,
203-
): Promise<BotDetectionResult> {
187+
request: AnalyzeRequest,
188+
options: BotConfig,
189+
): Promise<BotResult> {
204190
const analyze = await init(context);
205191

206192
if (typeof analyze !== "undefined") {
207-
return analyze.detectBot(headers, patterns_add, patterns_remove);
193+
return analyze.detectBot(JSON.stringify(request), options);
208194
} else {
209-
// TODO: Fallback to JS if we don't have WASM?
195+
// Skip the local evaluation of the rule if Wasm is not available
210196
return {
211-
botType: "not-analyzed",
212-
botScore: 0,
197+
allowed: [],
198+
denied: [],
213199
};
214200
}
215201
}
Binary file not shown.
Binary file not shown.
Binary file not shown.

analyze/wasm/arcjet_analyze_js_req.component.d.ts

+23-21
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,6 @@ export { SensitiveInfoEntity };
33
/**
44
* # Variants
55
*
6-
* ## `"unspecified"`
7-
*
8-
* ## `"not-analyzed"`
9-
*
10-
* ## `"automated"`
11-
*
12-
* ## `"likely-automated"`
13-
*
14-
* ## `"likely-not-a-bot"`
15-
*
16-
* ## `"verified-bot"`
17-
*/
18-
export type BotType = 'unspecified' | 'not-analyzed' | 'automated' | 'likely-automated' | 'likely-not-a-bot' | 'verified-bot';
19-
export interface BotDetectionResult {
20-
botType: BotType,
21-
botScore: number,
22-
}
23-
/**
24-
* # Variants
25-
*
266
* ## `"valid"`
277
*
288
* ## `"invalid"`
@@ -60,6 +40,28 @@ export interface SensitiveInfoResult {
6040
allowed: Array<DetectedSensitiveInfoEntity>,
6141
denied: Array<DetectedSensitiveInfoEntity>,
6242
}
43+
export type BotEntity = string;
44+
export interface AllowedBotConfig {
45+
entities: Array<BotEntity>,
46+
skipCustomDetect: boolean,
47+
}
48+
export interface DeniedBotConfig {
49+
entities: Array<BotEntity>,
50+
skipCustomDetect: boolean,
51+
}
52+
export type BotConfig = BotConfigAllowedBotConfig | BotConfigDeniedBotConfig;
53+
export interface BotConfigAllowedBotConfig {
54+
tag: 'allowed-bot-config',
55+
val: AllowedBotConfig,
56+
}
57+
export interface BotConfigDeniedBotConfig {
58+
tag: 'denied-bot-config',
59+
val: DeniedBotConfig,
60+
}
61+
export interface BotResult {
62+
allowed: Array<BotEntity>,
63+
denied: Array<BotEntity>,
64+
}
6365
import { ArcjetJsReqEmailValidatorOverrides } from './interfaces/arcjet-js-req-email-validator-overrides.js';
6466
import { ArcjetJsReqLogger } from './interfaces/arcjet-js-req-logger.js';
6567
import { ArcjetJsReqSensitiveInformationIdentifier } from './interfaces/arcjet-js-req-sensitive-information-identifier.js';
@@ -69,7 +71,7 @@ export interface ImportObject {
6971
'arcjet:js-req/sensitive-information-identifier': typeof ArcjetJsReqSensitiveInformationIdentifier,
7072
}
7173
export interface Root {
72-
detectBot(headers: string, patternsAdd: string, patternsRemove: string): BotDetectionResult,
74+
detectBot(request: string, options: BotConfig): BotResult,
7375
generateFingerprint(request: string, characteristics: Array<string>): string,
7476
isValidEmail(candidate: string, options: EmailValidationConfig): EmailValidationResult,
7577
detectSensitiveInfo(content: string, options: SensitiveInfoConfig): SensitiveInfoResult,

0 commit comments

Comments
 (0)