diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index d87e24771..a133068b5 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -7,7 +7,8 @@ import path from 'path'; import { EventEmitter } from 'events'; import { Where, What, PageState, Workflow, WorkflowFile, - ParamType, SelectorArray, CustomFunctions, + ParamType, SelectorArray, CustomFunctions, SessionData, + RegexableString, } from './types/workflow'; import { operators, meta } from './types/logic'; @@ -68,8 +69,13 @@ export default class Interpreter extends EventEmitter { private cumulativeResults: Record[] = []; - constructor(workflow: WorkflowFile, options?: Partial) { + private session: SessionData | null = null; + + private loginSuccessful: boolean = false; + + constructor(workflow: WorkflowFile, options?: Partial, session?: SessionData | null) { super(); + this.session = session || null; this.workflow = workflow.workflow; this.initializedWorkflow = null; this.options = { @@ -129,6 +135,41 @@ export default class Interpreter extends EventEmitter { } } + private isLoginUrl(url: string): boolean { + const loginKeywords = ['login', 'signin', 'sign-in', 'auth']; + const lowercaseUrl = url.toLowerCase(); + return loginKeywords.some(keyword => lowercaseUrl.includes(keyword)); + } + + private getUrlString(url: RegexableString | undefined): string { + if (!url) return ''; + + if (typeof url === 'string') return url; + + if ('$regex' in url) { + let normalUrl = url['$regex']; + return normalUrl + .replace(/^\^/, '') + .replace(/\$$/, '') + .replace(/\\([?])/g, '$1'); + } + + return ''; +} + + private findFirstPostLoginAction(workflow: Workflow): number { + for (let i = workflow.length - 1; i >= 0; i--) { + const action = workflow[i]; + if (action.where.url && action.where.url !== "about:blank") { + const urlString = this.getUrlString(action.where.url); + if (!this.isLoginUrl(urlString)) { + return i; + } + } + } + return -1; + } + // private getSelectors(workflow: Workflow, actionId: number): string[] { // const selectors: string[] = []; @@ -244,6 +285,14 @@ export default class Interpreter extends EventEmitter { url = action.where.url; } + if (this.loginSuccessful) { + const sessionState = await page.context().storageState(); + this.session = sessionState; + + this.loginSuccessful = false; + this.log('Stored authentication cookies after successful login', Level.LOG); + } + return { url, cookies: (await page.context().cookies([page.url()])) @@ -829,6 +878,28 @@ export default class Interpreter extends EventEmitter { workflowCopy = this.removeSpecialSelectors(workflowCopy); + if (this.session){ + const postLoginActionId = this.findFirstPostLoginAction(workflowCopy); + if (postLoginActionId !== -1) { + const targetUrl = this.getUrlString(workflowCopy[postLoginActionId].where.url); + if (targetUrl) { + try { + await p.goto(targetUrl); + await p.waitForLoadState('networkidle'); + + if (!this.isLoginUrl(targetUrl)) { + workflowCopy.splice(postLoginActionId + 1); + this.log('Successfully skipped login using stored cookies', Level.LOG); + } else { + this.log('Cookie authentication failed, proceeding with manual login', Level.LOG); + } + } catch (error) { + this.log(`Failed to navigate with stored cookies: ${error}`, Level.ERROR); + } + } + } + } + // apply ad-blocker to the current page try { await this.applyAdBlocker(p); @@ -916,6 +987,12 @@ export default class Interpreter extends EventEmitter { await this.carryOutSteps(p, action.what); usedActions.push(action.id ?? 'undefined'); + const url = this.getUrlString(action.where.url); + + if (this.isLoginUrl(url)) { + this.loginSuccessful = true; + } + workflowCopy.splice(actionId, 1); console.log(`Action with ID ${action.id} removed from the workflow copy.`); @@ -951,9 +1028,35 @@ export default class Interpreter extends EventEmitter { * @param {ParamType} params Workflow specific, set of parameters * for the `{$param: nameofparam}` fields. */ - public async run(page: Page, params?: ParamType): Promise { + public async run(page: Page, params?: ParamType): Promise { this.log('Starting the workflow.', Level.LOG); - const context = page.context(); + let context = page.context(); + + if (this.session) { + try { + this.log('Found existing session, creating new context with stored state...', Level.LOG); + + const newContext = await context.browser().newContext({ + storageState: { + cookies: this.session.cookies, + origins: this.session.origins.map(origin => ({ + origin: origin.origin, + localStorage: origin.localStorage.map(storage => ({ + name: storage.name, + value: storage.value + })) + })) + } + }); + + const newPage = await newContext.newPage(); + page = newPage; + + this.log('Successfully created new page with session state', Level.LOG); + } catch (error) { + this.log(`Failed to create page with session state: ${error.message}. Falling back to original page...`, Level.ERROR); + } + } page.setDefaultNavigationTimeout(100000); @@ -987,6 +1090,8 @@ export default class Interpreter extends EventEmitter { await this.concurrency.waitForCompletion(); this.stopper = null; + + return this.session; } public async stop(): Promise { diff --git a/maxun-core/src/types/workflow.ts b/maxun-core/src/types/workflow.ts index f7cf180d7..bd8d05c98 100644 --- a/maxun-core/src/types/workflow.ts +++ b/maxun-core/src/types/workflow.ts @@ -11,7 +11,7 @@ export type Meta = typeof meta[number]; export type SelectorArray = string[]; -type RegexableString = string | { '$regex': string }; +export type RegexableString = string | { '$regex': string }; type BaseConditions = { 'url': RegexableString, @@ -55,4 +55,26 @@ export type Workflow = WhereWhatPair[]; export type WorkflowFile = { meta?: MetaData, workflow: Workflow -}; \ No newline at end of file +}; + +interface Cookie { + name: string; + value: string; + domain: string; + path: string; + expires: number; + httpOnly: boolean; + secure: boolean; + sameSite: 'Lax' | 'Strict' | 'None'; +} + +interface Origin { + origin: string; + localStorage: Record[]; +} + +export interface SessionData { + cookies: Cookie[]; + origins: Origin[]; +} + diff --git a/server/src/api/record.ts b/server/src/api/record.ts index 05560487d..d174a52a5 100644 --- a/server/src/api/record.ts +++ b/server/src/api/record.ts @@ -580,7 +580,7 @@ async function executeRun(id: string) { const workflow = AddGeneratedFlags(recording.recording); const interpretationInfo = await browser.interpreter.InterpretRecording( - workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings + workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings, plainRun.robotMetaId ); const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); diff --git a/server/src/models/Robot.ts b/server/src/models/Robot.ts index 3b2717d64..ee88b8cca 100644 --- a/server/src/models/Robot.ts +++ b/server/src/models/Robot.ts @@ -9,12 +9,34 @@ interface RobotMeta { pairs: number; updatedAt: string; params: any[]; + isLogin?: boolean; } interface RobotWorkflow { workflow: WhereWhatPair[]; } +interface Cookie { + name: string; + value: string; + domain: string; + path: string; + expires: number; + httpOnly: boolean; + secure: boolean; + sameSite: 'Lax' | 'Strict' | 'None'; +} + +interface Origin { + origin: string; + localStorage: Record[]; +} + +interface SessionData { + cookies: Cookie[]; + origins: Origin[]; +} + interface RobotAttributes { id: string; userId?: number; @@ -26,6 +48,7 @@ interface RobotAttributes { google_access_token?: string | null; google_refresh_token?: string | null; schedule?: ScheduleConfig | null; + session?: SessionData | null; } interface ScheduleConfig { @@ -54,6 +77,7 @@ class Robot extends Model implements R public google_access_token!: string | null; public google_refresh_token!: string | null; public schedule!: ScheduleConfig | null; + public session!: SessionData | null; } Robot.init( @@ -99,6 +123,10 @@ Robot.init( type: DataTypes.JSONB, allowNull: true, }, + session: { + type: DataTypes.JSONB, + allowNull: true, + }, }, { sequelize, diff --git a/server/src/routes/storage.ts b/server/src/routes/storage.ts index 01f1ca6bf..393b78f9e 100644 --- a/server/src/routes/storage.ts +++ b/server/src/routes/storage.ts @@ -562,7 +562,7 @@ router.post('/runs/run/:id', requireSignIn, async (req: AuthenticatedRequest, re if (browser && currentPage) { const workflow = AddGeneratedFlags(recording.recording); const interpretationInfo = await browser.interpreter.InterpretRecording( - workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings); + workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings, plainRun.robotMetaId); const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput); await destroyRemoteBrowser(plainRun.browserId); diff --git a/server/src/types/index.ts b/server/src/types/index.ts index 75aac8029..c365b9801 100644 --- a/server/src/types/index.ts +++ b/server/src/types/index.ts @@ -286,3 +286,25 @@ export type Action = | WheelAction | FullScreenshotAction | AwaitTextAction; + +interface Cookie { + name: string; + value: string; + domain: string; + path: string; + expires: number; + httpOnly: boolean; + secure: boolean; + sameSite: 'Lax' | 'Strict' | 'None'; +} + +interface Origin { + origin: string; + localStorage: Record[]; +} + +export interface SessionData { + cookies: Cookie[]; + origins: Origin[]; +} + diff --git a/server/src/workflow-management/classes/Interpreter.ts b/server/src/workflow-management/classes/Interpreter.ts index c8aec13c4..dc0bd2041 100644 --- a/server/src/workflow-management/classes/Interpreter.ts +++ b/server/src/workflow-management/classes/Interpreter.ts @@ -2,8 +2,9 @@ import Interpreter, { WorkflowFile } from "maxun-core"; import logger from "../../logger"; import { Socket } from "socket.io"; import { Page } from "playwright"; -import { InterpreterSettings } from "../../types"; +import { InterpreterSettings, SessionData } from "../../types"; import { decrypt } from "../../utils/auth"; +import Robot from "../../models/Robot"; /** * Decrypts any encrypted inputs in the workflow. If checkLimit is true, it will also handle the limit validation for scrapeList action. @@ -260,7 +261,8 @@ export class WorkflowInterpreter { workflow: WorkflowFile, page: Page, updatePageOnPause: (page: Page) => void, - settings: InterpreterSettings + settings: InterpreterSettings, + robotMetaId?: string, ) => { const params = settings.params ? settings.params : null; delete settings.params; @@ -289,7 +291,21 @@ export class WorkflowInterpreter { } } - const interpreter = new Interpreter(processedWorkflow, options); + let session: SessionData | null = null; + if (robotMetaId) { + const robot = await Robot.findOne({ + where: { + 'recording_meta.id': robotMetaId + }, + raw: true + }); + + if (robot && robot.recording_meta?.isLogin) { + session = robot.session; + } + } + + const interpreter = new Interpreter(processedWorkflow, options, session); this.interpreter = interpreter; interpreter.on('flag', async (page, resume) => { @@ -309,7 +325,17 @@ export class WorkflowInterpreter { } }); - const status = await interpreter.run(page, params); + session = await interpreter.run(page, params); + if (session && robotMetaId) { + await Robot.update( + { session: session }, + { + where: { + 'recording_meta.id': robotMetaId + } + } + ); + } const lastArray = this.serializableData.length > 1 ? [this.serializableData[this.serializableData.length - 1]] @@ -317,7 +343,7 @@ export class WorkflowInterpreter { const result = { log: this.debugMessages, - result: status, + result: session, serializableOutput: lastArray.reduce((reducedObject, item, index) => { return { [`item-${index}`]: item, diff --git a/server/src/workflow-management/scheduler/index.ts b/server/src/workflow-management/scheduler/index.ts index ade7d9699..621b9e459 100644 --- a/server/src/workflow-management/scheduler/index.ts +++ b/server/src/workflow-management/scheduler/index.ts @@ -126,7 +126,7 @@ async function executeRun(id: string) { const workflow = AddGeneratedFlags(recording.recording); const interpretationInfo = await browser.interpreter.InterpretRecording( - workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings + workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings, plainRun.robotMetaId ); const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); diff --git a/src/components/robot/RobotDuplicate.tsx b/src/components/robot/RobotDuplicate.tsx index efde9b2ab..2643fc8e0 100644 --- a/src/components/robot/RobotDuplicate.tsx +++ b/src/components/robot/RobotDuplicate.tsx @@ -14,6 +14,7 @@ interface RobotMeta { pairs: number; updatedAt: string; params: any[]; + isLogin?: boolean; } interface RobotWorkflow { diff --git a/src/components/robot/RobotEdit.tsx b/src/components/robot/RobotEdit.tsx index f1f79b779..c56f878a9 100644 --- a/src/components/robot/RobotEdit.tsx +++ b/src/components/robot/RobotEdit.tsx @@ -15,6 +15,7 @@ interface RobotMeta { pairs: number; updatedAt: string; params: any[]; + isLogin: boolean; } interface RobotWorkflow { diff --git a/src/components/robot/RobotSettings.tsx b/src/components/robot/RobotSettings.tsx index fdbf90e23..56fd9daf6 100644 --- a/src/components/robot/RobotSettings.tsx +++ b/src/components/robot/RobotSettings.tsx @@ -14,6 +14,7 @@ interface RobotMeta { pairs: number; updatedAt: string; params: any[]; + isLogin?: boolean; } interface RobotWorkflow {