Skip to content

Commit e639027

Browse files
authored
[Rule-based Auto Tagging] Add rule schema for auto tagging (#17238) (#17653)
Signed-off-by: Ruirui Zhang <[email protected]> * add rule schema for workload management Signed-off-by: Ruirui Zhang <[email protected]> * modify rule structure based on comment Signed-off-by: Ruirui Zhang <[email protected]> * update based on comments Signed-off-by: Ruirui Zhang <[email protected]> * add javadoc for autotagging directory Signed-off-by: Ruirui Zhang <[email protected]> * change field name from 'name' to 'description' Signed-off-by: Ruirui Zhang <[email protected]> * update the rule schema Signed-off-by: Ruirui Zhang <[email protected]> * simplify autotagging registry Signed-off-by: Ruirui Zhang <[email protected]> * remove generic type parameter from Rule class Signed-off-by: Ruirui Zhang <[email protected]> * modify based on comments Signed-off-by: Ruirui Zhang <[email protected]> (cherry picked from commit b24c72b) Signed-off-by: Ruirui Zhang <[email protected]>
1 parent c332506 commit e639027

11 files changed

+1013
-0
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
## [Unreleased 2.x]
77
### Added
88
- Improve performace of NumericTermAggregation by avoiding unnecessary sorting([#17252](https://github.com/opensearch-project/OpenSearch/pull/17252))
9+
- [Rule Based Auto-tagging] Add rule schema for auto tagging ([#17238](https://github.com/opensearch-project/OpenSearch/pull/17238))
910
- Add execution_hint to cardinality aggregator request (#[17419](https://github.com/opensearch-project/OpenSearch/pull/17419))
1011
- [Rule Based Auto-tagging] Add in-memory attribute value store ([#17342](https://github.com/opensearch-project/OpenSearch/pull/17342))
1112

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.autotagging;
10+
11+
import org.opensearch.core.common.io.stream.StreamInput;
12+
import org.opensearch.core.common.io.stream.StreamOutput;
13+
import org.opensearch.core.common.io.stream.Writeable;
14+
15+
import java.io.IOException;
16+
17+
/**
18+
* Represents an attribute within the auto-tagging feature. Attributes define characteristics that can
19+
* be used for tagging and classification. Implementations must ensure that attributes
20+
* are uniquely identifiable by their name. Attributes should be singletons and managed centrally to
21+
* avoid duplicates.
22+
*
23+
* @opensearch.experimental
24+
*/
25+
public interface Attribute extends Writeable {
26+
String getName();
27+
28+
/**
29+
* Ensure that `validateAttribute` is called in the constructor of attribute implementations
30+
* to prevent potential serialization issues.
31+
*/
32+
default void validateAttribute() {
33+
String name = getName();
34+
if (name == null || name.isEmpty()) {
35+
throw new IllegalArgumentException("Attribute name cannot be null or empty");
36+
}
37+
}
38+
39+
@Override
40+
default void writeTo(StreamOutput out) throws IOException {
41+
out.writeString(getName());
42+
}
43+
44+
/**
45+
* Retrieves an attribute from the given feature type based on its name.
46+
* Implementations of `FeatureType.getAttributeFromName` must be thread-safe as this method
47+
* may be called concurrently.
48+
* @param in - the {@link StreamInput} from which the attribute name is read
49+
* @param featureType - the FeatureType used to look up the attribute
50+
*/
51+
static Attribute from(StreamInput in, FeatureType featureType) throws IOException {
52+
String attributeName = in.readString();
53+
Attribute attribute = featureType.getAttributeFromName(attributeName);
54+
if (attribute == null) {
55+
throw new IllegalStateException(attributeName + " is not a valid attribute under feature type " + featureType.getName());
56+
}
57+
return attribute;
58+
}
59+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.autotagging;
10+
11+
import org.opensearch.ResourceNotFoundException;
12+
13+
import java.util.HashMap;
14+
import java.util.Map;
15+
16+
/**
17+
* Registry for managing auto-tagging attributes and feature types.
18+
* This class provides functionality to register and retrieve {@link Attribute} and {@link FeatureType} instances
19+
* used for auto-tagging.
20+
*
21+
* @opensearch.experimental
22+
*/
23+
public class AutoTaggingRegistry {
24+
/**
25+
* featureTypesRegistryMap should be concurrently readable but not concurrently writable.
26+
* The registration of FeatureType should only be done during boot-up.
27+
*/
28+
public static final Map<String, FeatureType> featureTypesRegistryMap = new HashMap<>();
29+
public static final int MAX_FEATURE_TYPE_NAME_LENGTH = 30;
30+
31+
public static void registerFeatureType(FeatureType featureType) {
32+
validateFeatureType(featureType);
33+
String name = featureType.getName();
34+
if (featureTypesRegistryMap.containsKey(name) && featureTypesRegistryMap.get(name) != featureType) {
35+
throw new IllegalStateException("Feature type " + name + " is already registered. Duplicate feature type is not allowed.");
36+
}
37+
featureTypesRegistryMap.put(name, featureType);
38+
}
39+
40+
private static void validateFeatureType(FeatureType featureType) {
41+
if (featureType == null) {
42+
throw new IllegalStateException("Feature type can't be null. Unable to register.");
43+
}
44+
String name = featureType.getName();
45+
if (name == null || name.isEmpty() || name.length() > MAX_FEATURE_TYPE_NAME_LENGTH) {
46+
throw new IllegalStateException(
47+
"Feature type name " + name + " should not be null, empty or have more than " + MAX_FEATURE_TYPE_NAME_LENGTH + "characters"
48+
);
49+
}
50+
}
51+
52+
/**
53+
* Retrieves the registered {@link FeatureType} instance based on class name and feature type name.
54+
* This method assumes that FeatureTypes are singletons, meaning that each unique
55+
* (className, featureTypeName) pair corresponds to a single, globally shared instance.
56+
*
57+
* @param featureTypeName The name of the feature type.
58+
*/
59+
public static FeatureType getFeatureType(String featureTypeName) {
60+
FeatureType featureType = featureTypesRegistryMap.get(featureTypeName);
61+
if (featureType == null) {
62+
throw new ResourceNotFoundException(
63+
"Couldn't find a feature type with name: " + featureTypeName + ". Make sure you have registered it."
64+
);
65+
}
66+
return featureType;
67+
}
68+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.autotagging;
10+
11+
import org.opensearch.core.common.io.stream.StreamInput;
12+
import org.opensearch.core.common.io.stream.StreamOutput;
13+
import org.opensearch.core.common.io.stream.Writeable;
14+
15+
import java.io.IOException;
16+
import java.util.Map;
17+
18+
/**
19+
* Represents a feature type within the auto-tagging feature. Feature types define different categories of
20+
* characteristics that can be used for tagging and classification. Implementations of this interface are
21+
* responsible for registering feature types in {@link AutoTaggingRegistry}. Implementations must ensure that
22+
* feature types are uniquely identifiable by their class and name.
23+
*
24+
* Implementers should follow these guidelines:
25+
* Feature types should be singletons and managed centrally to avoid duplicates.
26+
* {@link #registerFeatureType()} must be called during initialization to ensure the feature type is available.
27+
*
28+
* @opensearch.experimental
29+
*/
30+
public interface FeatureType extends Writeable {
31+
int DEFAULT_MAX_ATTRIBUTE_VALUES = 10;
32+
int DEFAULT_MAX_ATTRIBUTE_VALUE_LENGTH = 100;
33+
34+
String getName();
35+
36+
/**
37+
* Returns the registry of allowed attributes for this feature type.
38+
* Implementations must ensure that access to this registry is thread-safe.
39+
*/
40+
Map<String, Attribute> getAllowedAttributesRegistry();
41+
42+
default int getMaxNumberOfValuesPerAttribute() {
43+
return DEFAULT_MAX_ATTRIBUTE_VALUES;
44+
}
45+
46+
default int getMaxCharLengthPerAttributeValue() {
47+
return DEFAULT_MAX_ATTRIBUTE_VALUE_LENGTH;
48+
}
49+
50+
void registerFeatureType();
51+
52+
default boolean isValidAttribute(Attribute attribute) {
53+
return getAllowedAttributesRegistry().containsValue(attribute);
54+
}
55+
56+
/**
57+
* Retrieves an attribute by its name from the allowed attributes' registry.
58+
* Implementations must ensure that this method is thread-safe.
59+
* @param name The name of the attribute.
60+
*/
61+
default Attribute getAttributeFromName(String name) {
62+
return getAllowedAttributesRegistry().get(name);
63+
}
64+
65+
@Override
66+
default void writeTo(StreamOutput out) throws IOException {
67+
out.writeString(getName());
68+
}
69+
70+
static FeatureType from(StreamInput in) throws IOException {
71+
return AutoTaggingRegistry.getFeatureType(in.readString());
72+
}
73+
}

0 commit comments

Comments
 (0)