1111import com .parquet .parquetdataformat .fields .ArrowSchemaBuilder ;
1212import com .parquet .parquetdataformat .engine .read .ParquetDataSourceCodec ;
1313import com .parquet .parquetdataformat .writer .ParquetWriter ;
14+ import org .opensearch .cluster .metadata .IndexNameExpressionResolver ;
15+ import org .opensearch .cluster .service .ClusterService ;
16+ import org .opensearch .common .settings .Setting ;
17+ import org .opensearch .common .settings .Settings ;
18+ import org .opensearch .core .common .io .stream .NamedWriteableRegistry ;
19+ import org .opensearch .core .xcontent .NamedXContentRegistry ;
20+ import org .opensearch .env .Environment ;
21+ import org .opensearch .env .NodeEnvironment ;
1422import org .opensearch .index .engine .DataFormatPlugin ;
1523import org .opensearch .index .engine .exec .DataFormat ;
1624import org .opensearch .index .engine .exec .IndexingExecutionEngine ;
2028import org .opensearch .plugins .DataSourcePlugin ;
2129import org .opensearch .index .mapper .MapperService ;
2230import org .opensearch .plugins .Plugin ;
31+ import org .opensearch .repositories .RepositoriesService ;
32+ import org .opensearch .script .ScriptService ;
33+ import org .opensearch .threadpool .ThreadPool ;
34+ import org .opensearch .transport .client .Client ;
2335import org .opensearch .vectorized .execution .search .spi .DataSourceCodec ;
36+ import org .opensearch .watcher .ResourceWatcherService ;
2437
2538import java .io .IOException ;
26- import java .util .HashMap ;
27- import java .util .Map ;
28- import java .util .Optional ;
39+ import java .util .*;
40+ import java .util .function .Supplier ;
2941
3042/**
3143 * OpenSearch plugin that provides Parquet data format support for indexing operations.
5870 */
5971public class ParquetDataFormatPlugin extends Plugin implements DataFormatPlugin , DataSourcePlugin {
6072
73+ private Settings settings ;
74+
75+ public static String DEFAULT_MAX_NATIVE_ALLOCATION = "10%" ;
76+
77+ public static final Setting <String > INDEX_MAX_NATIVE_ALLOCATION = Setting .simpleString (
78+ "index.parquet.max_native_allocation" ,
79+ DEFAULT_MAX_NATIVE_ALLOCATION ,
80+ Setting .Property .NodeScope ,
81+ Setting .Property .Dynamic
82+ );
83+
6184 @ Override
6285 @ SuppressWarnings ("unchecked" )
6386 public <T extends DataFormat > IndexingExecutionEngine <T > indexingEngine (MapperService mapperService , ShardPath shardPath ) {
64- return (IndexingExecutionEngine <T >) new ParquetExecutionEngine (() -> ArrowSchemaBuilder .getSchema (mapperService ), shardPath );
87+ return (IndexingExecutionEngine <T >) new ParquetExecutionEngine (settings , () -> ArrowSchemaBuilder .getSchema (mapperService ), shardPath );
88+ }
89+
90+ @ Override
91+ public Collection <Object > createComponents (
92+ Client client ,
93+ ClusterService clusterService ,
94+ ThreadPool threadPool ,
95+ ResourceWatcherService resourceWatcherService ,
96+ ScriptService scriptService ,
97+ NamedXContentRegistry xContentRegistry ,
98+ Environment environment ,
99+ NodeEnvironment nodeEnvironment ,
100+ NamedWriteableRegistry namedWriteableRegistry ,
101+ IndexNameExpressionResolver indexNameExpressionResolver ,
102+ Supplier <RepositoriesService > repositoriesServiceSupplier
103+ ) {
104+ this .settings = clusterService .getSettings ();
105+ return super .createComponents (client , clusterService , threadPool , resourceWatcherService , scriptService , xContentRegistry , environment , nodeEnvironment , namedWriteableRegistry , indexNameExpressionResolver , repositoriesServiceSupplier );
65106 }
66107
67108 private Class <? extends DataFormat > getDataFormatType () {
@@ -83,6 +124,11 @@ public Optional<Map<org.opensearch.vectorized.execution.search.DataFormat, DataS
83124 // return Optional.empty();
84125 }
85126
127+ @ Override
128+ public List <Setting <?>> getSettings () {
129+ return List .of (INDEX_MAX_NATIVE_ALLOCATION );
130+ }
131+
86132 // for testing locally only
87133 public void indexDataToParquetEngine () throws IOException {
88134 //Create Engine (take Schema as Input)
0 commit comments