Skip to content

Commit 1fa4333

Browse files
SuaYooikreymertw4l
authored
feat: Apply saved workflow settings to current crawl (#2514)
Resolves #2366 ## Changes Allows users to update current crawl with newly saved workflow settings. ## Manual testing 1. Log in as crawler 2. Start a crawl 3. Go to edit workflow. Verify "Update Crawl" button is shown 4. Click "Update Crawl". Verify crawl is updated with new settings --------- Co-authored-by: Ilya Kreymer <[email protected]> Co-authored-by: Tessa Walsh <[email protected]>
1 parent c4a7ebc commit 1fa4333

File tree

6 files changed

+70
-43
lines changed

6 files changed

+70
-43
lines changed

backend/btrixcloud/crawlconfigs.py

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ def check_attr_changed(
387387

388388
async def update_crawl_config(
389389
self, cid: UUID, org: Organization, user: User, update: UpdateCrawlConfig
390-
) -> dict[str, bool | str]:
390+
) -> CrawlConfigUpdateResponse:
391391
# pylint: disable=too-many-locals, too-many-branches, too-many-statements
392392
"""Update name, scale, schedule, and/or tags for an existing crawl config"""
393393

@@ -455,11 +455,9 @@ async def update_crawl_config(
455455
run_now = update.runNow
456456

457457
if not changed and not metadata_changed and not run_now:
458-
return {
459-
"updated": True,
460-
"settings_changed": changed,
461-
"metadata_changed": metadata_changed,
462-
}
458+
return CrawlConfigUpdateResponse(
459+
settings_changed=changed, metadata_changed=metadata_changed
460+
)
463461

464462
if changed:
465463
orig_dict = orig_crawl_config.dict(exclude_unset=True, exclude_none=True)
@@ -498,10 +496,11 @@ async def update_crawl_config(
498496
status_code=404, detail=f"Crawl Config '{cid}' not found"
499497
)
500498

499+
crawlconfig = CrawlConfig.from_dict(result)
500+
501501
# update in crawl manager to change schedule
502502
if schedule_changed:
503503
try:
504-
crawlconfig = CrawlConfig.from_dict(result)
505504
await self.crawl_manager.update_scheduled_job(crawlconfig, str(user.id))
506505

507506
except Exception as exc:
@@ -511,16 +510,24 @@ async def update_crawl_config(
511510
status_code=404, detail=f"Crawl Config '{cid}' not found"
512511
)
513512

514-
ret: dict[str, bool | str] = {
515-
"updated": True,
516-
"settings_changed": changed,
517-
"metadata_changed": metadata_changed,
518-
"storageQuotaReached": self.org_ops.storage_quota_reached(org),
519-
"execMinutesQuotaReached": self.org_ops.exec_mins_quota_reached(org),
520-
}
513+
ret = CrawlConfigUpdateResponse(
514+
settings_changed=changed,
515+
metadata_changed=metadata_changed,
516+
storageQuotaReached=self.org_ops.storage_quota_reached(org),
517+
execMinutesQuotaReached=self.org_ops.exec_mins_quota_reached(org),
518+
)
519+
521520
if run_now:
522521
crawl_id = await self.run_now(cid, org, user)
523-
ret["started"] = crawl_id
522+
ret.started = crawl_id
523+
elif update.updateRunning and changed:
524+
running_crawl = await self.get_running_crawl(cid)
525+
if running_crawl:
526+
await self.crawl_manager.update_running_crawl_config(
527+
running_crawl.id, crawlconfig
528+
)
529+
ret.updatedRunning = True
530+
524531
return ret
525532

526533
async def update_usernames(self, userid: UUID, updated_name: str) -> None:

backend/btrixcloud/crawlmanager.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,25 @@ async def reload_running_crawl_config(self, crawl_id: str):
249249
crawl_id, {"lastConfigUpdate": date_to_str(dt_now())}
250250
)
251251

252+
async def update_running_crawl_config(
253+
self, crawl_id: str, crawlconfig: CrawlConfig
254+
):
255+
"""force update of config for running crawl"""
256+
time_now = date_to_str(dt_now())
257+
258+
# pylint: disable=use-dict-literal
259+
patch = dict(
260+
crawlerChannel=crawlconfig.crawlerChannel,
261+
scale=crawlconfig.scale,
262+
timeout=crawlconfig.crawlTimeout,
263+
maxCrawlSize=crawlconfig.maxCrawlSize,
264+
proxyId=crawlconfig.proxyId or DEFAULT_PROXY_ID,
265+
lastConfigUpdate=time_now,
266+
restartTime=time_now,
267+
)
268+
269+
return await self._patch_job(crawl_id, patch)
270+
252271
async def create_qa_crawl_job(
253272
self,
254273
crawlconfig: CrawlConfig,

backend/btrixcloud/models.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,7 @@ class UpdateCrawlConfig(BaseModel):
506506
description: Optional[str] = None
507507
autoAddCollections: Optional[List[UUID]] = None
508508
runNow: bool = False
509+
updateRunning: bool = False
509510

510511
# crawl data: revision tracked
511512
schedule: Optional[str] = None
@@ -578,9 +579,10 @@ class CrawlConfigSearchValues(BaseModel):
578579
class CrawlConfigUpdateResponse(BaseModel):
579580
"""Response model for updating crawlconfigs"""
580581

581-
updated: bool
582+
updated: bool = True
582583
settings_changed: bool
583584
metadata_changed: bool
585+
updatedRunning: bool = False
584586

585587
storageQuotaReached: Optional[bool] = False
586588
execMinutesQuotaReached: Optional[bool] = False

backend/test/test_run_crawl.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1305,16 +1305,15 @@ def test_custom_behavior_logs(
13051305
if log["context"] == "behaviorScriptCustom":
13061306
assert log["message"] in (
13071307
"test-stat",
1308-
"done!",
1309-
"Using Site-Specific Behavior: TestBehavior",
1308+
"In Test Behavior!",
13101309
)
13111310
if log["message"] in ("test-stat", "done!"):
13121311
assert log["details"]["behavior"] == "TestBehavior"
13131312
assert log["details"]["page"] == "https://specs.webrecorder.net/"
13141313

13151314
custom_log_line_count += 1
13161315

1317-
assert custom_log_line_count == 3
1316+
assert custom_log_line_count == 2
13181317

13191318

13201319
def test_crawls_exclude_behavior_logs(

frontend/src/features/crawl-workflows/workflow-editor.ts

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,12 @@ import {
110110
type WorkflowDefaults,
111111
} from "@/utils/workflow";
112112

113-
type NewCrawlConfigParams = WorkflowParams & {
114-
runNow: boolean;
113+
type CrawlConfigParams = WorkflowParams & {
115114
config: WorkflowParams["config"] & {
116115
seeds: Seed[];
117116
};
118117
};
118+
type WorkflowRunParams = { runNow: boolean; updateRunning?: boolean };
119119

120120
const STEPS = SECTIONS;
121121
type StepName = (typeof STEPS)[number];
@@ -626,28 +626,28 @@ export class WorkflowEditor extends BtrixElement {
626626
type="button"
627627
?disabled=${this.isSubmitting}
628628
?loading=${this.isSubmitting}
629-
@click=${this.save}
629+
@click=${() => void this.save()}
630630
>
631631
${msg("Save")}
632632
</sl-button>
633633
</sl-tooltip>
634634
<sl-tooltip
635635
content=${this.isCrawlRunning
636-
? msg("Crawl is already running")
636+
? msg("Save and apply settings to current crawl")
637637
: msg("Save and run with new settings")}
638638
?disabled=${this.isCrawlRunning === null}
639639
>
640640
<sl-button
641641
size="small"
642642
variant="primary"
643643
type="submit"
644-
?disabled=${isArchivingDisabled(this.org, true) ||
644+
?disabled=${(!this.isCrawlRunning &&
645+
isArchivingDisabled(this.org, true)) ||
645646
this.isSubmitting ||
646-
this.isCrawlRunning ||
647647
this.isCrawlRunning === null}
648648
?loading=${this.isSubmitting || this.isCrawlRunning === null}
649649
>
650-
${msg(html`Run Crawl`)}
650+
${msg(this.isCrawlRunning ? "Update Crawl" : "Run Crawl")}
651651
</sl-button>
652652
</sl-tooltip>
653653
</footer>
@@ -2191,14 +2191,13 @@ https://archiveweb.page/images/${"logo.svg"}`}
21912191
private async onSubmit(event: SubmitEvent) {
21922192
event.preventDefault();
21932193

2194-
this.updateFormState({
2195-
runNow: true,
2194+
void this.save({
2195+
runNow: !this.isCrawlRunning,
2196+
updateRunning: Boolean(this.isCrawlRunning),
21962197
});
2197-
2198-
void this.save();
21992198
}
22002199

2201-
private async save() {
2200+
private async save(opts?: WorkflowRunParams) {
22022201
if (!this.formElem) return;
22032202

22042203
// TODO Move away from manual validation check
@@ -2235,7 +2234,15 @@ https://archiveweb.page/images/${"logo.svg"}`}
22352234
return;
22362235
}
22372236

2238-
const config = this.parseConfig();
2237+
const config: CrawlConfigParams & WorkflowRunParams = {
2238+
...this.parseConfig(),
2239+
runNow: Boolean(opts?.runNow),
2240+
};
2241+
2242+
if (this.configId) {
2243+
config.updateRunning = Boolean(opts?.updateRunning);
2244+
}
2245+
22392246
this.isSubmitting = true;
22402247

22412248
try {
@@ -2412,15 +2419,14 @@ https://archiveweb.page/images/${"logo.svg"}`}
24122419
}
24132420
}
24142421

2415-
private parseConfig(): NewCrawlConfigParams {
2416-
const config: NewCrawlConfigParams = {
2422+
private parseConfig(): CrawlConfigParams {
2423+
const config: CrawlConfigParams = {
24172424
// Job types are now merged into a single type
24182425
jobType: "custom",
24192426
name: this.formState.jobName || "",
24202427
description: this.formState.description,
24212428
scale: this.formState.scale,
24222429
profileid: this.formState.browserProfile?.id || "",
2423-
runNow: this.formState.runNow,
24242430
schedule: this.formState.scheduleType === "cron" ? this.utcSchedule : "",
24252431
crawlTimeout: this.formState.crawlTimeoutMinutes * 60,
24262432
maxCrawlSize: this.formState.maxCrawlSizeGB * BYTES_PER_GB,
@@ -2471,7 +2477,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
24712477
}
24722478

24732479
private parseUrlListConfig(): Pick<
2474-
NewCrawlConfigParams["config"],
2480+
CrawlConfigParams["config"],
24752481
"seeds" | "scopeType" | "extraHops" | "useSitemap" | "failOnFailedSeed"
24762482
> {
24772483
const config = {
@@ -2489,7 +2495,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
24892495
}
24902496

24912497
private parseSeededConfig(): Pick<
2492-
NewCrawlConfigParams["config"],
2498+
CrawlConfigParams["config"],
24932499
"seeds" | "scopeType" | "useSitemap" | "failOnFailedSeed"
24942500
> {
24952501
const primarySeedUrl = this.formState.primarySeedUrl;

frontend/src/utils/workflow.ts

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ export type FormState = {
8181
minute: number;
8282
period: "AM" | "PM";
8383
};
84-
runNow: boolean;
8584
jobName: WorkflowParams["name"];
8685
browserProfile: Profile | null;
8786
tags: Tags;
@@ -139,7 +138,6 @@ export const getDefaultFormState = (): FormState => ({
139138
minute: 0,
140139
period: "AM",
141140
},
142-
runNow: false,
143141
jobName: "",
144142
browserProfile: null,
145143
tags: [],
@@ -275,10 +273,6 @@ export function getInitialFormState(params: {
275273
lang: params.initialWorkflow.config.lang ?? defaultFormState.lang,
276274
scheduleType: defaultFormState.scheduleType,
277275
scheduleFrequency: defaultFormState.scheduleFrequency,
278-
runNow:
279-
params.org?.storageQuotaReached || params.org?.execMinutesQuotaReached
280-
? false
281-
: defaultFormState.runNow,
282276
tags: params.initialWorkflow.tags,
283277
autoAddCollections: params.initialWorkflow.autoAddCollections,
284278
jobName: params.initialWorkflow.name || defaultFormState.jobName,

0 commit comments

Comments
 (0)