-
Notifications
You must be signed in to change notification settings - Fork 2.8k
Add npu_mlir_runtime.hpp for developer build #32522
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
XinWangIntel
wants to merge
1
commit into
openvinotoolkit:master
Choose a base branch
from
XinWangIntel:174100-mlir-deps
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+162
−0
Draft
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
156 changes: 156 additions & 0 deletions
156
src/plugins/intel_npu/src/al/include/intel_npu/npu_mlir_runtime.hpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,156 @@ | ||
| // | ||
| // Copyright (C) 2023-2025 Intel Corporation. | ||
| // SPDX-License-Identifier: Apache-2.0 | ||
| // | ||
|
|
||
| #ifndef NPU_MLIR_RUNTIME_H | ||
| #define NPU_MLIR_RUNTIME_H | ||
|
|
||
| #if defined(__cplusplus) | ||
| # pragma once | ||
| #endif | ||
|
|
||
| #include "ze_api.h" | ||
| #include "ze_graph_ext.h" | ||
|
|
||
| #if defined(__cplusplus) | ||
| # include <cstdint> | ||
| # include <cstdlib> | ||
| #else | ||
| # include <stdint.h> | ||
| # include <stdlib.h> | ||
| #endif | ||
|
|
||
| #if defined(__cplusplus) | ||
| extern "C" { | ||
| #endif | ||
|
|
||
| /////////////////////////////////////////////////////////////////////////////// | ||
| /// @brief Supported versions | ||
| /// | ||
| /// @details | ||
| /// - Graph extension versions contain major and minor attributes, use | ||
| /// ::NPU_MLIR_RUNTIME_MAJOR_VERSION and ::NPU_MLIR_RUNTIME_MINOR_VERSION | ||
| typedef enum _npu_mlir_runtime_version_t { | ||
| NPU_MLIR_RUNTIME_VERSION_1_0 = ZE_MAKE_VERSION(1, 0), ///< version 1.0 | ||
| NPU_MLIR_RUNTIME_VERSION_CURRENT = NPU_MLIR_RUNTIME_VERSION_1_0, ///< latest known version | ||
| NPU_MLIR_RUNTIME_VERSION_FORCE_UINT32 = 0x7fffffff, | ||
| } npu_mlir_runtime_version_t; | ||
|
|
||
| /////////////////////////////////////////////////////////////////////////////// | ||
| #ifndef NPU_MLIR_RUNTIME_APICALL | ||
| # if defined(_WIN32) | ||
| /// @brief Calling convention for all API functions | ||
| # define NPU_MLIR_RUNTIME_APICALL __cdecl | ||
| # else | ||
| # define NPU_MLIR_RUNTIME_APICALL | ||
| # endif // defined(_WIN32) | ||
| #endif // NPU_MLIR_RUNTIME_APICALL | ||
|
|
||
| /////////////////////////////////////////////////////////////////////////////// | ||
| #ifndef NPU_MLIR_RUNTIME_APIEXPORT | ||
| # if defined(_WIN32) | ||
| /// @brief Windows-specific dllexport storage-class attribute | ||
| # define NPU_MLIR_RUNTIME_APIEXPORT __declspec(dllexport) | ||
| # else | ||
| # define NPU_MLIR_RUNTIME_APIEXPORT | ||
| # endif // defined(_WIN32) | ||
| #endif // NPU_MLIR_RUNTIME_APIEXPORT | ||
|
|
||
| /////////////////////////////////////////////////////////////////////////////// | ||
| /// @brief NPU MLIR runtime handle | ||
| typedef struct _npu_mlir_runtime_handle_t* npu_mlir_runtime_handle_t; | ||
|
|
||
| /////////////////////////////////////////////////////////////////////////////// | ||
| /// @brief Defined Return/Error codes | ||
| typedef enum _npu_mlir_runtime_result_t { | ||
| NPU_MLIR_RUNTIME_RESULT_SUCCESS = 0, | ||
| NPU_MLIR_RUNTIME_RESULT_ERROR_INVALID_NULL_POINTER = 0x80000001, | ||
| NPU_MLIR_RUNTIME_RESULT_ERROR_UNKNOWN = 0x8ffffffe, | ||
| NPU_MLIR_RUNTIME_RESULT_FORCE_UINT32 = 0x8fffffff, | ||
| } npu_mlir_runtime_result_t; | ||
|
|
||
| /////////////////////////////////////////////////////////////////////////////// | ||
| /// @brief Blob descriptor | ||
| typedef struct _npu_mlir_runtime_blob_desc_t { | ||
| size_t inputSize; ///< [in] Size of input buffer in bytes | ||
| const uint8_t* pInput; ///< [in] Pointer to input buffer | ||
| } npu_mlir_runtime_blob_desc_t; | ||
|
|
||
| /////////////////////////////////////////////////////////////////////////////// | ||
| /// @brief Runtime properties | ||
| typedef struct _npu_mlir_runtime_properties_t { | ||
| uint32_t numOfSubGraphs; | ||
| uint32_t numOfGraphArgs; | ||
| } npu_mlir_runtime_properties_t; | ||
|
|
||
| typedef struct _npu_mlir_runtime_mem_ref_t { | ||
| const void* basePtr; | ||
| const void* data; | ||
| int64_t offset; | ||
| int64_t sizes[4]; | ||
| int64_t strides[4]; | ||
| } npu_mlir_runtime_mem_ref_t; | ||
|
|
||
| /////////////////////////////////////////////////////////////////////////////// | ||
| /// @brief Execute params | ||
| typedef struct _npu_mlir_runtime_execute_params_t { | ||
| npu_mlir_runtime_mem_ref_t** pInputs; | ||
| uint32_t numOfInputs; | ||
| npu_mlir_runtime_mem_ref_t** pOutputs; | ||
| uint32_t numOfOutputs; | ||
| ze_context_handle_t ctx; | ||
| ze_device_handle_t device; | ||
| ze_graph_dditable_ext_t* graphDdiTableExt; | ||
| ze_command_list_handle_t* commandLists; | ||
| uint64_t numCommandLists; | ||
| ze_command_queue_handle_t commandQueue; | ||
| ze_fence_handle_t inferenceFence; | ||
| ze_event_handle_t event; | ||
| } npu_mlir_runtime_execute_params_t; | ||
|
|
||
| /////////////////////////////////////////////////////////////////////////////// | ||
| /// @brief Init MLIR runtime instance and return handle | ||
| NPU_MLIR_RUNTIME_APIEXPORT npu_mlir_runtime_result_t NPU_MLIR_RUNTIME_APICALL npuMLIRRuntimeCreate( | ||
| const npu_mlir_runtime_blob_desc_t* desc, ///< [in] pointer to graph descriptor | ||
| npu_mlir_runtime_handle_t* phRuntime, ///< [out] pointer to handle of mlir runtime object created | ||
| npu_mlir_runtime_properties_t* pProperties ///< [in] pointer to properties of the runtime | ||
| ); | ||
|
|
||
| /////////////////////////////////////////////////////////////////////////////// | ||
| /// @brief Destroy MLIR runtime instance | ||
| NPU_MLIR_RUNTIME_APIEXPORT npu_mlir_runtime_result_t NPU_MLIR_RUNTIME_APICALL npuMLIRRuntimeDestroy( | ||
| npu_mlir_runtime_handle_t hRuntime ///< [in][release] handle of mlir runtime object to destroy | ||
| ); | ||
|
|
||
| /////////////////////////////////////////////////////////////////////////////// | ||
| /// @brief Get metadata from MLIR runtime instance | ||
| NPU_MLIR_RUNTIME_APIEXPORT npu_mlir_runtime_result_t NPU_MLIR_RUNTIME_APICALL | ||
| npuMLIRRuntimeGetMetadata(npu_mlir_runtime_handle_t hRuntime, ///< [in][release] handle of mlir runtime object | ||
| uint32_t argIndex, | ||
| ze_graph_argument_properties_3_t* | ||
| pGraphArgumentProperties, ///< [in,out] query result for graph argument properties. | ||
| _ze_graph_argument_metadata_t* pGraphArgumentMetadata); | ||
|
|
||
| /////////////////////////////////////////////////////////////////////////////// | ||
| /// @brief Execute MLIR runtime with params | ||
| NPU_MLIR_RUNTIME_APIEXPORT npu_mlir_runtime_result_t NPU_MLIR_RUNTIME_APICALL npuMLIRRuntimeExecute( | ||
| npu_mlir_runtime_handle_t hRuntime, ///< [in][release] handle of mlir runtime object | ||
| npu_mlir_runtime_execute_params_t* pParams ///< [in] pointer to execution parameters | ||
| ); | ||
|
|
||
| /////////////////////////////////////////////////////////////////////////////// | ||
| /// @brief Predit output shape based on input shape | ||
| NPU_MLIR_RUNTIME_APIEXPORT npu_mlir_runtime_result_t NPU_MLIR_RUNTIME_APICALL npuMLIRRuntimePredictOutputShape( | ||
| npu_mlir_runtime_handle_t hRuntime, ///< [in][release] handle of mlir runtime object | ||
| npu_mlir_runtime_mem_ref_t** pInputArgs, ///< [in] pointer to input argument mem descriptor pointer array | ||
| uint32_t numOfInputArgs, ///< [in] number of input arguments | ||
| npu_mlir_runtime_mem_ref_t** pOutputArgs, ///< [out] pointer to output argument mem descriptor pointer array | ||
| uint32_t numOfOutputArgs ///< [in] number of | ||
| ); | ||
|
|
||
| #if defined(__cplusplus) | ||
| } // extern "C" | ||
| #endif | ||
|
|
||
| #endif // NPU_MLIR_RUNTIME_H | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just a reminder for me to file a ticket to extend to 5d and support arbitrary rank for MLIR runtime