Skip to content

feat: support for session state login #410

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 10 commits into
base: develop
Choose a base branch
from
113 changes: 109 additions & 4 deletions maxun-core/src/interpret.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ import path from 'path';
import { EventEmitter } from 'events';
import {
Where, What, PageState, Workflow, WorkflowFile,
ParamType, SelectorArray, CustomFunctions,
ParamType, SelectorArray, CustomFunctions, SessionData,
RegexableString,
} from './types/workflow';

import { operators, meta } from './types/logic';
Expand Down Expand Up @@ -68,8 +69,13 @@ export default class Interpreter extends EventEmitter {

private cumulativeResults: Record<string, any>[] = [];

constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>) {
private session: SessionData | null = null;

private loginSuccessful: boolean = false;

constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>, session?: SessionData | null) {
super();
this.session = session || null;
this.workflow = workflow.workflow;
this.initializedWorkflow = null;
this.options = {
Expand Down Expand Up @@ -129,6 +135,41 @@ export default class Interpreter extends EventEmitter {
}
}

private isLoginUrl(url: string): boolean {
const loginKeywords = ['login', 'signin', 'sign-in', 'auth'];
const lowercaseUrl = url.toLowerCase();
return loginKeywords.some(keyword => lowercaseUrl.includes(keyword));
}

private getUrlString(url: RegexableString | undefined): string {
if (!url) return '';

if (typeof url === 'string') return url;

if ('$regex' in url) {
let normalUrl = url['$regex'];
return normalUrl
.replace(/^\^/, '')
.replace(/\$$/, '')
.replace(/\\([?])/g, '$1');
}

return '';
}

private findFirstPostLoginAction(workflow: Workflow): number {
for (let i = workflow.length - 1; i >= 0; i--) {
const action = workflow[i];
if (action.where.url && action.where.url !== "about:blank") {
const urlString = this.getUrlString(action.where.url);
if (!this.isLoginUrl(urlString)) {
return i;
}
}
}
return -1;
}

// private getSelectors(workflow: Workflow, actionId: number): string[] {
// const selectors: string[] = [];

Expand Down Expand Up @@ -244,6 +285,14 @@ export default class Interpreter extends EventEmitter {
url = action.where.url;
}

if (this.loginSuccessful) {
const sessionState = await page.context().storageState();
this.session = sessionState;

this.loginSuccessful = false;
this.log('Stored authentication cookies after successful login', Level.LOG);
}

return {
url,
cookies: (await page.context().cookies([page.url()]))
Expand Down Expand Up @@ -829,6 +878,28 @@ export default class Interpreter extends EventEmitter {

workflowCopy = this.removeSpecialSelectors(workflowCopy);

if (this.session){
const postLoginActionId = this.findFirstPostLoginAction(workflowCopy);
if (postLoginActionId !== -1) {
const targetUrl = this.getUrlString(workflowCopy[postLoginActionId].where.url);
if (targetUrl) {
try {
await p.goto(targetUrl);
await p.waitForLoadState('networkidle');

if (!this.isLoginUrl(targetUrl)) {
workflowCopy.splice(postLoginActionId + 1);
this.log('Successfully skipped login using stored cookies', Level.LOG);
} else {
this.log('Cookie authentication failed, proceeding with manual login', Level.LOG);
}
} catch (error) {
this.log(`Failed to navigate with stored cookies: ${error}`, Level.ERROR);
}
}
}
}

// apply ad-blocker to the current page
try {
await this.applyAdBlocker(p);
Expand Down Expand Up @@ -916,6 +987,12 @@ export default class Interpreter extends EventEmitter {
await this.carryOutSteps(p, action.what);
usedActions.push(action.id ?? 'undefined');

const url = this.getUrlString(action.where.url);

if (this.isLoginUrl(url)) {
this.loginSuccessful = true;
}

workflowCopy.splice(actionId, 1);
console.log(`Action with ID ${action.id} removed from the workflow copy.`);

Expand Down Expand Up @@ -951,9 +1028,35 @@ export default class Interpreter extends EventEmitter {
* @param {ParamType} params Workflow specific, set of parameters
* for the `{$param: nameofparam}` fields.
*/
public async run(page: Page, params?: ParamType): Promise<void> {
public async run(page: Page, params?: ParamType): Promise<SessionData> {
this.log('Starting the workflow.', Level.LOG);
const context = page.context();
let context = page.context();

if (this.session) {
try {
this.log('Found existing session, creating new context with stored state...', Level.LOG);

const newContext = await context.browser().newContext({
storageState: {
cookies: this.session.cookies,
origins: this.session.origins.map(origin => ({
origin: origin.origin,
localStorage: origin.localStorage.map(storage => ({
name: storage.name,
value: storage.value
}))
}))
}
});

const newPage = await newContext.newPage();
page = newPage;

this.log('Successfully created new page with session state', Level.LOG);
} catch (error) {
this.log(`Failed to create page with session state: ${error.message}. Falling back to original page...`, Level.ERROR);
}
}

page.setDefaultNavigationTimeout(100000);

Expand Down Expand Up @@ -987,6 +1090,8 @@ export default class Interpreter extends EventEmitter {
await this.concurrency.waitForCompletion();

this.stopper = null;

return this.session;
}

public async stop(): Promise<void> {
Expand Down
26 changes: 24 additions & 2 deletions maxun-core/src/types/workflow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ export type Meta = typeof meta[number];

export type SelectorArray = string[];

type RegexableString = string | { '$regex': string };
export type RegexableString = string | { '$regex': string };

type BaseConditions = {
'url': RegexableString,
Expand Down Expand Up @@ -55,4 +55,26 @@ export type Workflow = WhereWhatPair[];
export type WorkflowFile = {
meta?: MetaData,
workflow: Workflow
};
};

interface Cookie {
name: string;
value: string;
domain: string;
path: string;
expires: number;
httpOnly: boolean;
secure: boolean;
sameSite: 'Lax' | 'Strict' | 'None';
}

interface Origin {
origin: string;
localStorage: Record<string, any>[];
}

export interface SessionData {
cookies: Cookie[];
origins: Origin[];
}

2 changes: 1 addition & 1 deletion server/src/api/record.ts
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ async function executeRun(id: string) {

const workflow = AddGeneratedFlags(recording.recording);
const interpretationInfo = await browser.interpreter.InterpretRecording(
workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings
workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings, plainRun.robotMetaId
);

const binaryOutputService = new BinaryOutputService('maxun-run-screenshots');
Expand Down
28 changes: 28 additions & 0 deletions server/src/models/Robot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,34 @@ interface RobotMeta {
pairs: number;
updatedAt: string;
params: any[];
isLogin?: boolean;
}

interface RobotWorkflow {
workflow: WhereWhatPair[];
}

interface Cookie {
name: string;
value: string;
domain: string;
path: string;
expires: number;
httpOnly: boolean;
secure: boolean;
sameSite: 'Lax' | 'Strict' | 'None';
}

interface Origin {
origin: string;
localStorage: Record<string, any>[];
}

interface SessionData {
cookies: Cookie[];
origins: Origin[];
}

interface RobotAttributes {
id: string;
userId?: number;
Expand All @@ -26,6 +48,7 @@ interface RobotAttributes {
google_access_token?: string | null;
google_refresh_token?: string | null;
schedule?: ScheduleConfig | null;
session?: SessionData | null;
}

interface ScheduleConfig {
Expand Down Expand Up @@ -54,6 +77,7 @@ class Robot extends Model<RobotAttributes, RobotCreationAttributes> implements R
public google_access_token!: string | null;
public google_refresh_token!: string | null;
public schedule!: ScheduleConfig | null;
public session!: SessionData | null;
}

Robot.init(
Expand Down Expand Up @@ -99,6 +123,10 @@ Robot.init(
type: DataTypes.JSONB,
allowNull: true,
},
session: {
type: DataTypes.JSONB,
allowNull: true,
},
},
{
sequelize,
Expand Down
2 changes: 1 addition & 1 deletion server/src/routes/storage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ router.post('/runs/run/:id', requireSignIn, async (req: AuthenticatedRequest, re
if (browser && currentPage) {
const workflow = AddGeneratedFlags(recording.recording);
const interpretationInfo = await browser.interpreter.InterpretRecording(
workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings);
workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings, plainRun.robotMetaId);
const binaryOutputService = new BinaryOutputService('maxun-run-screenshots');
const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput);
await destroyRemoteBrowser(plainRun.browserId);
Expand Down
22 changes: 22 additions & 0 deletions server/src/types/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -286,3 +286,25 @@ export type Action =
| WheelAction
| FullScreenshotAction
| AwaitTextAction;

interface Cookie {
name: string;
value: string;
domain: string;
path: string;
expires: number;
httpOnly: boolean;
secure: boolean;
sameSite: 'Lax' | 'Strict' | 'None';
}

interface Origin {
origin: string;
localStorage: Record<string, any>[];
}

export interface SessionData {
cookies: Cookie[];
origins: Origin[];
}

36 changes: 31 additions & 5 deletions server/src/workflow-management/classes/Interpreter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ import Interpreter, { WorkflowFile } from "maxun-core";
import logger from "../../logger";
import { Socket } from "socket.io";
import { Page } from "playwright";
import { InterpreterSettings } from "../../types";
import { InterpreterSettings, SessionData } from "../../types";
import { decrypt } from "../../utils/auth";
import Robot from "../../models/Robot";

/**
* Decrypts any encrypted inputs in the workflow. If checkLimit is true, it will also handle the limit validation for scrapeList action.
Expand Down Expand Up @@ -260,7 +261,8 @@ export class WorkflowInterpreter {
workflow: WorkflowFile,
page: Page,
updatePageOnPause: (page: Page) => void,
settings: InterpreterSettings
settings: InterpreterSettings,
robotMetaId?: string,
) => {
const params = settings.params ? settings.params : null;
delete settings.params;
Expand Down Expand Up @@ -289,7 +291,21 @@ export class WorkflowInterpreter {
}
}

const interpreter = new Interpreter(processedWorkflow, options);
let session: SessionData | null = null;
if (robotMetaId) {
const robot = await Robot.findOne({
where: {
'recording_meta.id': robotMetaId
},
raw: true
});

if (robot && robot.recording_meta?.isLogin) {
session = robot.session;
}
}

const interpreter = new Interpreter(processedWorkflow, options, session);
this.interpreter = interpreter;

interpreter.on('flag', async (page, resume) => {
Expand All @@ -309,15 +325,25 @@ export class WorkflowInterpreter {
}
});

const status = await interpreter.run(page, params);
session = await interpreter.run(page, params);
if (session && robotMetaId) {
await Robot.update(
{ session: session },
{
where: {
'recording_meta.id': robotMetaId
}
}
);
}

const lastArray = this.serializableData.length > 1
? [this.serializableData[this.serializableData.length - 1]]
: this.serializableData;

const result = {
log: this.debugMessages,
result: status,
result: session,
serializableOutput: lastArray.reduce((reducedObject, item, index) => {
return {
[`item-${index}`]: item,
Expand Down
2 changes: 1 addition & 1 deletion server/src/workflow-management/scheduler/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ async function executeRun(id: string) {

const workflow = AddGeneratedFlags(recording.recording);
const interpretationInfo = await browser.interpreter.InterpretRecording(
workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings
workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings, plainRun.robotMetaId
);

const binaryOutputService = new BinaryOutputService('maxun-run-screenshots');
Expand Down
Loading