diff --git a/_search-plugins/ubi/data-structures.md b/_search-plugins/ubi/data-structures.md deleted file mode 100644 index 0c64c3254b..0000000000 --- a/_search-plugins/ubi/data-structures.md +++ /dev/null @@ -1,204 +0,0 @@ ---- -layout: default -title: UBI client data structures -parent: User Behavior Insights -has_children: false -nav_order: 10 ---- - -# UBI client data structures - -Data structures are used to create events that follow the [User Behavior Insights (UBI) event schema specification](https://github.com/o19s/ubi). -For more information about the schema, see [UBI index schemas]({{site.url}}{{site.baseurl}}/search-plugins/ubi/schemas/). - - -You must provide an implementation for the following functions: -- `getClientId()` -- `getQueryId()` - -You can also optionally provide an implementation for the following functions: -- `getSessionId()` -- `getPageId()` - - -The following JavaScript structures can be used as a starter implementation to serialize UBI events into schema-compatible JSON: -```js -/********************************************************************************************* - * Ubi Event data structures - * The following structures help ensure adherence to the UBI event schema - *********************************************************************************************/ - - - -export class UbiEventData { - constructor(object_type, id=null, description=null, details=null) { - this.object_id_field = object_type; - this.object_id = id; - this.description = description; - this.object_detail = details; - } -} -export class UbiPosition{ - constructor({ordinal=null, x=null, y=null, trail=null}={}) { - this.ordinal = ordinal; - this.x = x; - this.y = y; - if(trail) - this.trail = trail; - else { - const trail = getTrail(); - if(trail && trail.length > 0) - this.trail = trail; - } - } -} - - -export class UbiEventAttributes { - /** - * Tries to prepopulate common event attributes - * The developer can add an `object` that the user interacted with and - * the site `position` information relevant to the event - * - * Attributes, other than `object` or `position` can be added in the form: - * attributes['item1'] = 1 - * attributes['item2'] = '2' - * - * @param {*} attributes: object with general event attributes - * @param {*} object: the data object the user interacted with - * @param {*} position: the site position information - */ - constructor({attributes={}, object=null, position=null}={}) { - if(attributes != null){ - Object.assign(this, attributes); - } - if(object != null && Object.keys(object).length > 0){ - this.object = object; - } - if(position != null && Object.keys(position).length > 0){ - this.position = position; - } - this.setDefaultValues(); - } - - setDefaultValues(){ - try{ - if(!this.hasOwnProperty('dwell_time') && typeof TimeMe !== 'undefined'){ - this.dwell_time = TimeMe.getTimeOnPageInSeconds(window.location.pathname); - } - - if(!this.hasOwnProperty('browser')){ - this.browser = window.navigator.userAgent; - } - - if(!this.hasOwnProperty('page_id')){ - this.page_id = window.location.pathname; - } - if(!this.hasOwnProperty('session_id')){ - this.session_id = getSessionId(); - } - - if(!this.hasOwnProperty('page_id')){ - this.page_id = getPageId(); - } - - if(!this.hasOwnProperty('position') || this.position == null){ - const trail = getTrail(); - if(trail.length > 0){ - this.position = new UbiPosition({trail:trail}); - } - } - // ToDo: set IP - } - catch(error){ - console.log(error); - } - } -} - - - -export class UbiEvent { - constructor(action_name, {message_type='INFO', message=null, event_attributes={}, data_object={}}={}) { - this.action_name = action_name; - this.client_id = getClientId(); - this.query_id = getQueryId(); - this.timestamp = Date.now(); - - this.message_type = message_type; - if( message ) - this.message = message; - - this.event_attributes = new UbiEventAttributes({attributes:event_attributes, object:data_object}); - } - - /** - * Use to suppress null objects in the json output - * @param key - * @param value - * @returns - */ - static replacer(key, value){ - if(value == null || - (value.constructor == Object && Object.keys(value).length === 0)) { - return undefined; - } - return value; - } - - /** - * - * @returns json string - */ - toJson() { - return JSON.stringify(this, UbiEvent.replacer); - } -} -``` -{% include copy.html %} - -# Sample usage - -```js -export function logUbiMessage(event_type, message_type, message){ - let e = new UbiEvent(event_type, { - message_type:message_type, - message:message - }); - logEvent(e); -} - -export function logDwellTime(action_name, page, seconds){ - console.log(`${page} => ${seconds}`); - let e = new UbiEvent(action_name, { - message:`On page ${page} for ${seconds} seconds`, - event_attributes:{ - session_id: getSessionId()}, - dwell_seconds:seconds - }, - data_object:TimeMe - }); - logEvent(e); -} - -/** - * ordinal is the number within a list of results - * for the item that was clicked - */ -export function logItemClick(item, ordinal){ - let e = new UbiEvent('item_click', { - message:`Item ${item['object_id']} was clicked`, - event_attributes:{session_id: getSessionId()}, - data_object:item, - }); - e.event_attributes.position.ordinal = ordinal; - logEvent(e); -} - -export function logEvent( event ){ - // some configured http client - return client.index( index = 'ubi_events', body = event.toJson()); -} - -``` -{% include copy.html %} diff --git a/_search-plugins/ubi/index.md b/_search-plugins/ubi/index.md index bdf09a632b..f49ce76c12 100644 --- a/_search-plugins/ubi/index.md +++ b/_search-plugins/ubi/index.md @@ -11,39 +11,66 @@ redirect_from: **Introduced 2.15** {: .label .label-purple } -**References UBI Specification 1.0.0** +**References UBI Specification 1.2.0** {: .label .label-purple } -User Behavior Insights (UBI) is a plugin that captures client-side events and queries for the purposes of improving search relevance and the user experience. -It is a causal system, linking a user's query to all of their subsequent interactions with your application until they perform another search. +User Behavior Insights (UBI) is a standard for capturing client-side events and queries for the purposes of improving search relevance and the user experience. +It is a *causal* system, linking a user's query to all of their subsequent interactions with your application until they perform another search. +This differs from many systems that infer the linking of search to events through *chronological* sequence. -UBI includes the following elements: -* A machine-readable [schema](https://github.com/o19s/ubi) that faciliates interoperablity of the UBI specification. -* An OpenSearch [plugin](https://github.com/opensearch-project/user-behavior-insights) that facilitates the storage of client-side events and queries. -* A client-side JavaScript [example reference implementation]({{site.url}}{{site.baseurl}}/search-plugins/ubi/data-structures/) that shows how to capture events and send them to the OpenSearch UBI plugin. - - -The UBI documentation is organized into two categories: *Explanation and reference* and *Tutorials and how-to guides*: +> “how our users are using our product, whether search results were useful for them and whether they clicked on top-n results we gave and all related stuff” - Data Scientist -*Explanation and reference* - -| Link | Description | -| :--------- | :------- | -| [UBI Request/Response Specification](https://github.com/o19s/ubi/) | The industry-standard schema for UBI requests and responses. The current version references UBI Specification 1.0.0. | -| [UBI index schema]({{site.url}}{{site.baseurl}}/search-plugins/ubi/schemas/) | Documentation on the individual OpenSearch query and event stores. | +UBI includes the following elements: +* [ubi.js](https://github.com/opensearch-project/user-behavior-insights/tree/main/ubi-javascript-collector/ubi.js): a client-side JavaScript library that captures searches and events. +* A machine-readable [schema](https://github.com/o19s/ubi) that faciliates interoperablity of the UBI specification. +* An (optional!) OpenSearch [plugin](https://github.com/opensearch-project/user-behavior-insights) that streamlines the recording of query data. +Advanced features in OpenSearch, such as the Search Quality Evaluation Framework, and the Hybrid Search Optimizer all build on the UBI specification. -*Tutorials and how-to guides* + -| Link | Description | -| :--------- | :------- | -| [UBI plugin](https://github.com/opensearch-project/user-behavior-insights) | How to install and use the UBI plugin. | -| [UBI client data structures]({{site.url}}{{site.baseurl}}/search-plugins/ubi/data-structures/) | Sample JavaScript structures for populating the event store. | -| [Example UBI query DSL queries]({{site.url}}{{site.baseurl}}/search-plugins/ubi/dsl-queries/) | How to write queries for UBI data in OpenSearch query DSL. | -| [Example UBI SQL queries]({{site.url}}{{site.baseurl}}/search-plugins/ubi/sql-queries/) | How to write analytic queries for UBI data in SQL. | -| [UBI dashboard tutorial]({{site.url}}{{site.baseurl}}/search-plugins/ubi/ubi-dashboard-tutorial/) | How to build a dashboard containing UBI data. | -| [Chorus Opensearch Edition](https://github.com/o19s/chorus-opensearch-edition/?tab=readme-ov-file#structured-learning-using-chorus-opensearch-edition) katas | A series of structured tutorials that teach you how to use UBI with OpenSearch through a demo e-commerce store. | + + + + + + + + + +
+

Tutorials

+ +
+

How To Guides

+ +
+

Explanation

+ +
+

Reference

+ +
The documentation categories were adapted using concepts based on [Diátaxis](https://diataxis.fr/). diff --git a/_search-plugins/ubi/schemas.md b/_search-plugins/ubi/schemas.md index d8398e43bc..5487a46773 100644 --- a/_search-plugins/ubi/schemas.md +++ b/_search-plugins/ubi/schemas.md @@ -16,8 +16,8 @@ The User Behavior Insights (UBI) data collection process involves tracking and r For UBI to function properly, the connections between the following fields must be consistently maintained within an application that has UBI enabled: -- [`object_id`](#object_id) represents an ID for whatever object the user receives in response to a query. For example, if you search for books, it might be an ISBN code of a book, such as `978-3-16-148410-0`. -- [`query_id`](#query_id) is a unique ID for the raw query language executed and the `object_id` values of the _hits_ returned by the user's query. +- [`object_id`](#object_id) represents an ID for whatever object the user receives in response to a query. For example, if you search for books, it might be an ISBN number for a book, such as `978-3-16-148410-0`. +- [`query_id`](#query_id) is a unique ID for the raw query language executed and the `object_id` maps to the primary identifier of the _hits_ returned by the user's query. - [`client_id`](#client_id) represents a unique query source. This is typically a web browser used by a unique user. - [`object_id_field`](#object_id_field) specifies the name of the field in your index that provides the `object_id`. For example, if you search for books, the value might be `isbn_code`. - [`action_name`](#action_name), though not technically an ID, specifies the exact user action (such as `click`, `add_to_cart`, `watch`, `view`, or `purchase`) that was taken (or not taken) for an object with a given `object_id`. @@ -138,11 +138,11 @@ All underlying query information and results (`object_ids`) are stored in the `u The `ubi_queries` index [schema](https://github.com/OpenSearch-project/user-behavior-insights/tree/main/src/main/resources/queries-mapping.json) includes the following fields: -- `timestamp` (events and queries): A UNIX timestamp indicating when the query was received. +- `timestamp` (events and queries): A ISO 8601 formatted timestamp indicating when the query was received. -- `query_id` (events and queries): The unique ID of the query provided by the client or generated automatically. Different queries with the same text generate different `query_id` values. - -- `client_id` (events and queries): A user/client ID provided by the client application. +- `query_id` (events and queries): The unique ID of the query provided by the client or generated by the search engine. Different queries with the same text generate different `query_id` values. + +- `client_id` (events and queries): A client ID provided by the client application. - `query_response_objects_ids` (queries): An array of object IDs. An ID can have the same value as the `_id`, but it is meant to be the externally valid ID of a document, item, or product. @@ -169,14 +169,14 @@ The following are the predefined, minimal fields in the `ubi_events` index:

- `query_id` (size 100): The unique identifier of a query, which is typically a UUID but can be any string. - The `query_id` is either provided by the client or generated at index time by the UBI plugin. The `query_id` values in both the **UBI queries** and **UBI events** indexes must be consistent. + The `query_id` is either provided by the client or generated at query time by the UBI plugin. The `query_id` values in both the **UBI queries** and **UBI events** indexes must be consistent.

- `client_id`: The client that issues the query. This is typically a web browser used by a unique user. The `client_id` in both the **UBI queries** and **UBI events** indexes must be consistent. -- `timestamp`: When the event occurred, either in UNIX format or formatted as `2018-11-13T20:20:39+00:00`. +- `timestamp`: When the event occurred, using ISO 8601 format such as `2018-11-13T20:20:39+00:00Z`. - `message_type` (size 100): A logical bin for grouping actions (each with an `action_name`). For example, `QUERY` or `CONVERSION`. @@ -193,18 +193,12 @@ The following are the predefined, minimal fields in the `ubi_events` index: - `event_attributes.position.ordinal`: Tracks the list position that a user can select (for example, selecting the third element can be described as `event{onClick, results[4]}`). - - `event_attributes.position.{x,y}`: Tracks x and y values defined by the client. - - - `event_attributes.position.page_depth`: Tracks the page depth of the results. - - - `event_attributes.position.scroll_depth`: Tracks the scroll depth of the page results. - - - `event_attributes.position.trail`: A text field that tracks the path/trail that a user took to get to this location. - + - `event_attributes.position.xy.{x,y}`: Tracks x and y values defined by the client. + - `event_attributes.object`: Contains identifying information about the object returned by the query (for example, a book, product, or post). The `object` structure can refer to the object by internal ID or object ID. The `object_id` is the ID that links prior queries to this object. This field comprises the following subfields: - - `event_attributes.object.internal_id`: A unique ID that OpenSearch can use to internally index the object, for example, the `_id` field in the indexes. + - `event_attributes.object.internal_id`: The unique ID that OpenSearch uses to internally index the object, for example, the `_id` field in the indexes.

@@ -214,7 +208,7 @@ The following are the predefined, minimal fields in the `ubi_events` index:

- - `event_attributes.object.object_id_field`: Indicates the type/class of the object and the name of the search index field that contains the `object_id`. + - `event_attributes.object.object_id_field`: Indicates the type/class of the object and the name of the search index field that contains the `object_id` such as `ssn`, `isbn`, or `ean`. - `event_attributes.object.description`: An optional description of the object. diff --git a/_search-plugins/ubi/ubi-javascript-collector.md b/_search-plugins/ubi/ubi-javascript-collector.md new file mode 100644 index 0000000000..4bc5e0e839 --- /dev/null +++ b/_search-plugins/ubi/ubi-javascript-collector.md @@ -0,0 +1,118 @@ +--- +layout: default +title: UBI JavaScript Collector +parent: User Behavior Insights +has_children: false +nav_order: 10 +--- + +# UBI JavaScript collector + +UBI comes with a very basic JavaScript client that manages the life cycle of the `query_id` for a specific search and can create UBI Event data structures and store them for specific actions. + +For more information about the schema, see [UBI index schemas]({{site.url}}{{site.baseurl}}/search-plugins/ubi/schemas/). + +We recommend that you refer to the client as a starting point for your own specific needs. + +## Installation + +The client comes as a single file `ubi.js` and only has a dependency on the `axios` library. +Download it from https://github.com/opensearch-project/user-behavior-insights/tree/main/ubi-javascript-collector. + +Reference the events and create the client via: + +```js +import { UbiEvent } from './ubi'; +import { UbiEventAttributes } from './ubi' +import { UbiClient } from './ubi' + +const ubiClient = new UbiClient('http://localhost:9200'); +``` + + +## Creating an event + +This code snippet is to track adding an item to a shopping cart in an e-commerce application. It utilizes the `UbiEvent` and `UbiEventAttributes` class to encapsulate event details, which can then be sent to the tracking system. +```js +var event = new UbiEvent( + 'add_to_cart', + client_id, + session_id, + getQueryId(), + new UbiEventAttributes('product', item.primary_ean, item.title, item), + item.title + ' (' + item.id + ')' +); +``` + +### Parameters + +1. **Event Name**: + - `'add_to_cart'` - This string indicates the type of event being tracked. + +2. **Client ID**: + - `client_id` - A variable that holds the unique identifier for the client. This helps in distinguishing between different users or sessions. + +3. **Session ID**: + - `session_id` - A variable that contains the unique identifier for the user session. This is used to track user interactions within a specific session. + +4. **Query ID**: + - `getQueryId()` - A function call that retrieves the current query ID, which may represent a specific search or interaction context. + +5. **UbiEventAttributes**: + - This is an instance of the `UbiEventAttributes` class, which encapsulates additional details about the event: + - **Type**: + - `'product'` - Specifies that the attribute type is related to a product. + - **Primary EAN**: + - `item.primary_ean` - This is the product's unique identifier in EAN format. + - **Title**: + - `item.title` - The name or description of the product. + - **Item**: + - `item` - The complete product object containing all relevant details. + +6. **Event Label**: + - `item.title + ' (' + item.id + ')'` - This creates a descriptive label for the event that includes the product title and its unique identifier (ID). + +The method `getQueryId()` refers to a helper method to generate a unique query id (and stores it in the session). +Here is a sample method: + +``` +function generateQueryId(){ + const query_id = generateGuid(); + sessionStorage.setItem('query_id', query_id); + return query_id; +} + +function generateGuid() { + let id = ''; + try{ + id = crypto.randomUUID(); + } + catch(error){ + // crypto.randomUUID only works in https, not http context, so fallback. + id ='10000000-1000-4000-8000-100000000000'.replace(/[018]/g, c => + (c ^ crypto.getRandomValues(new Uint8Array(1))[0] & 15 >> c / 4).toString(16) + ); + } + return id; +}; +``` + +## Tracking the event + +Sending the event to the backend is as simple as: + +```js +ubiClient.trackEvent(event); +``` + + +## Tracking queries + +You have the option of tracking queries using the client (instead of using the UBI plugin for OpenSearch). + +This looks very similar to tracking events: + +```js +const query = new UbiQuery(APPLICATION, client_id, query_id, value, "_id", {}); +ubiClient.trackQuery(query) +```