Skip to content

Commit f84f6f5

Browse files
tw4likreymer
andauthored
Add basic backend validation for selectLinks (#2510)
Follow-up to #2152 Related to #2487 This PR provides very basic validation of the `config.selectLinks` argument on workflow creation and update. Namely, it checks that: - `config.selectLinks` is not an empty array - Each entry consists of two non-empty text sequences separated by `->` At this point we're not validating the actual CSS selector on the backend, though we could add that down the road. Tests have been added accordingly. Co-authored-by: Ilya Kreymer <[email protected]>
1 parent 23f9e08 commit f84f6f5

File tree

3 files changed

+64
-1
lines changed

3 files changed

+64
-1
lines changed

backend/btrixcloud/crawlconfigs.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,8 @@ async def add_crawl_config(
233233
exclude = [exclude]
234234
validate_regexes(exclude)
235235

236+
self._validate_link_selectors(config_in.config.selectLinks)
237+
236238
if config_in.config.customBehaviors:
237239
for url in config_in.config.customBehaviors:
238240
self._validate_custom_behavior_url_syntax(url)
@@ -297,6 +299,24 @@ async def add_crawl_config(
297299
execMinutesQuotaReached=exec_mins_quota_reached,
298300
)
299301

302+
def _validate_link_selectors(self, link_selectors: List[str]):
303+
"""Validate link selectors
304+
305+
Ensure at least one link selector is set and that all the link slectors passed
306+
follow expected syntax: selector->attribute/property.
307+
308+
We don't yet check the validity of the CSS selector itself.
309+
"""
310+
if not link_selectors:
311+
raise HTTPException(status_code=400, detail="invalid_link_selector")
312+
313+
for link_selector in link_selectors:
314+
parts = link_selector.split("->")
315+
if not len(parts) == 2:
316+
raise HTTPException(status_code=400, detail="invalid_link_selector")
317+
if not parts[0] or not parts[1]:
318+
raise HTTPException(status_code=400, detail="invalid_link_selector")
319+
300320
def _validate_custom_behavior_url_syntax(self, url: str) -> Tuple[bool, List[str]]:
301321
"""Validate custom behaviors are valid URLs after removing custom git syntax"""
302322
git_prefix = "git+"
@@ -379,6 +399,9 @@ async def update_crawl_config(
379399
exclude = [exclude]
380400
validate_regexes(exclude)
381401

402+
if update.config and update.config.selectLinks is not None:
403+
self._validate_link_selectors(update.config.selectLinks)
404+
382405
if update.config and update.config.customBehaviors:
383406
for url in update.config.customBehaviors:
384407
self._validate_custom_behavior_url_syntax(url)

backend/test/test_crawlconfigs.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,24 @@ def test_update_config_invalid_exclude_regex(
172172
assert r.status_code == 400
173173
assert r.json()["detail"] == "invalid_regex"
174174

175+
def test_update_config_invalid_link_selector(
176+
crawler_auth_headers, default_org_id, sample_crawl_data
177+
):
178+
r = requests.patch(
179+
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{cid}/",
180+
headers=crawler_auth_headers,
181+
json={"config": {"selectLinks": []}},
182+
)
183+
assert r.status_code == 400
184+
assert r.json()["detail"] == "invalid_link_selector"
185+
186+
r = requests.patch(
187+
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{cid}/",
188+
headers=crawler_auth_headers,
189+
json={"config": {"selectLinks": ["a[href]->href", "->href"]}},
190+
)
191+
assert r.status_code == 400
192+
assert r.json()["detail"] == "invalid_link_selector"
175193

176194
def test_verify_default_select_links(
177195
crawler_auth_headers, default_org_id, sample_crawl_data
@@ -545,6 +563,28 @@ def test_add_crawl_config_invalid_exclude_regex(
545563
assert r.json()["detail"] == "invalid_regex"
546564

547565

566+
def test_add_crawl_config_invalid_link_selectors(
567+
crawler_auth_headers, default_org_id, sample_crawl_data
568+
):
569+
sample_crawl_data["config"]["selectLinks"] = []
570+
r = requests.post(
571+
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
572+
headers=crawler_auth_headers,
573+
json=sample_crawl_data,
574+
)
575+
assert r.status_code == 400
576+
assert r.json()["detail"] == "invalid_link_selector"
577+
578+
sample_crawl_data["config"]["selectLinks"] = ["a[href]->href", "->href"]
579+
r = requests.post(
580+
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
581+
headers=crawler_auth_headers,
582+
json=sample_crawl_data,
583+
)
584+
assert r.status_code == 400
585+
assert r.json()["detail"] == "invalid_link_selector"
586+
587+
548588
def test_add_crawl_config_custom_behaviors_invalid_url(
549589
crawler_auth_headers, default_org_id, sample_crawl_data
550590
):

frontend/src/features/crawl-workflows/custom-behaviors-table-row.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ const errorFor: Record<ValidationErrorCode, string> = {
7777
};
7878

7979
const inputStyle = [
80-
tw`[--sl-input-background-color-hover:transparent] [--sl-input-background-color:transparent] [--sl-input-border-color-hover:transparent] [--sl-input-border-radius-medium:0] [--sl-input-spacing-medium:var(--sl-spacing-small)]`,
80+
tw`[--sl-input-border-radius-medium:0] [--sl-input-spacing-medium:var(--sl-spacing-small)] [--sl-input-background-color-hover:transparent] [--sl-input-background-color:transparent] [--sl-input-border-color-hover:transparent]`,
8181
tw`data-[valid]:[--sl-input-border-color:transparent]`,
8282
tw`part-[form-control-help-text]:mx-1 part-[form-control-help-text]:mb-1`,
8383
];

0 commit comments

Comments
 (0)