Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[QNN EP] Dump QNN json graph #22843

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
4 changes: 3 additions & 1 deletion cmake/onnxruntime_providers_qnn.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@

source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_qnn_cc_srcs})
onnxruntime_add_static_library(onnxruntime_providers_qnn ${onnxruntime_providers_qnn_cc_srcs})
onnxruntime_add_include_to_target(onnxruntime_providers_qnn onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf-lite flatbuffers::flatbuffers Boost::mp11)
onnxruntime_add_include_to_target(onnxruntime_providers_qnn onnxruntime_common onnxruntime_framework onnx onnx_proto
protobuf::libprotobuf-lite flatbuffers::flatbuffers Boost::mp11
nlohmann_json::nlohmann_json)
target_link_libraries(onnxruntime_providers_qnn)
add_dependencies(onnxruntime_providers_qnn onnx ${onnxruntime_EXTERNAL_DEPENDENCIES})
set_target_properties(onnxruntime_providers_qnn PROPERTIES CXX_STANDARD_REQUIRED ON)
Expand Down
4 changes: 4 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -3662,6 +3662,10 @@ struct OrtApi {
* execution provider (typically CPU EP).
* - "0": Default. Disabled. QNN EP will handle quantization and dequantization of graph I/O.
* - "1": Enabled.
* "enable_qnn_graph_dump": Set to "1" to enable dumping of QNN graphs as JSON files. Each graph partition
* assigned to QNN EP is dumped to a separate file.
* "qnn_graph_dump_dir": Directory in which to dump QNN JSON graphs. If not specified, QNN graphs are dumped in the
* program's current working directory. Ignored if "enable_json_graphs_dump" is not set.
*
* SNPE supported keys:
* "runtime": SNPE runtime engine, options: "CPU", "CPU_FLOAT32", "GPU", "GPU_FLOAT32_16_HYBRID", "GPU_FLOAT16",
Expand Down
16 changes: 10 additions & 6 deletions onnxruntime/core/providers/qnn/builder/qnn_def.cc
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,15 @@ Status CompareQnnQuantParams(const Qnn_QuantizeParams_t& qparam0, const Qnn_Quan
return Status::OK();
}

uint32_t CalcQnnTensorNumElems(const Qnn_Tensor_t& qnn_tensor) {
uint32_t* qnn_tensor_dims = GetQnnTensorDims(qnn_tensor);
uint32_t qnn_tensor_rank = GetQnnTensorRank(qnn_tensor);
return std::accumulate(qnn_tensor_dims,
qnn_tensor_dims + qnn_tensor_rank,
1,
std::multiplies<uint32_t>());
}

bool CreateTensorInQnnGraph(const QNN_INTERFACE_VER_TYPE& qnn_interface,
const Qnn_GraphHandle_t& graph,
const std::string& node_name,
Expand Down Expand Up @@ -422,12 +431,7 @@ bool CreateTensorInQnnGraph(const QNN_INTERFACE_VER_TYPE& qnn_interface,
return false;
}
// verify size expressed by the dims matches the raw tensor size
auto qnn_tensor_dims = GetQnnTensorDims(qnn_tensor);
auto qnn_tensor_rank = GetQnnTensorRank(qnn_tensor);
uint32_t qnn_tensor_size = std::accumulate(qnn_tensor_dims,
qnn_tensor_dims + qnn_tensor_rank,
static_cast<uint32_t>(data_size),
std::multiplies<uint32_t>());
uint32_t qnn_tensor_size = CalcQnnTensorNumElems(qnn_tensor) * gsl::narrow_cast<uint32_t>(data_size);
auto qnn_tensor_buf_size = GetQnnTensorClientBuf(qnn_tensor).dataSize;
if (qnn_tensor_size != qnn_tensor_buf_size) {
ss << "Data length mismatch for static tensor. node_name: " << node_name
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/providers/qnn/builder/qnn_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ Qnn_DataType_t GetQnnTensorDataType(const Qnn_Tensor_t& qnn_tensor);
Qnn_TensorMemType_t GetQnnTensorMemType(const Qnn_Tensor_t& qnn_tensor);
uint32_t GetQnnTensorRank(const Qnn_Tensor_t& qnn_tensor);
uint32_t* GetQnnTensorDims(const Qnn_Tensor_t& qnn_tensor);
uint32_t CalcQnnTensorNumElems(const Qnn_Tensor_t& qnn_tensor);
const Qnn_ClientBuffer_t& GetQnnTensorClientBuf(const Qnn_Tensor_t& qnn_tensor);
const Qnn_QuantizeParams_t& GetQnnTensorQParams(const Qnn_Tensor_t& qnn_tensor);

Expand Down
18 changes: 16 additions & 2 deletions onnxruntime/core/providers/qnn/builder/qnn_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ Status QnnModel::ComposeGraph(const GraphViewer& graph_viewer,
const onnxruntime::Node& fused_node,
const qnn::ModelSettings& model_settings,
const logging::Logger& logger,
const QnnGraph_Config_t** graph_configs) {
const QnnGraph_Config_t** graph_configs,
const std::string& debug_json_graph_path) {
LOGS(logger, VERBOSE) << "ComposeGraph Graph name: " << graph_viewer.Name();

// Holder for the NodeUnits in the graph, this will guarantee the NodeUnits is
Expand Down Expand Up @@ -140,7 +141,20 @@ Status QnnModel::ComposeGraph(const GraphViewer& graph_viewer,
}
}

ORT_RETURN_IF_NOT(qnn_model_wrapper.ComposeQnnGraph(), "Failed to compose Qnn graph.");
const bool build_debug_json_graph = !debug_json_graph_path.empty();
ORT_RETURN_IF_NOT(qnn_model_wrapper.ComposeQnnGraph(build_debug_json_graph), "Failed to compose Qnn graph.");

if (build_debug_json_graph) {
const nlohmann::json& json_graph = qnn_model_wrapper.GetQnnJSONGraph();
std::ofstream ofs(debug_json_graph_path);

if (ofs.is_open()) {
ofs << json_graph.dump();
ofs.close();
} else {
LOGS(logger_, WARNING) << "Could not open JSON graph file: " << debug_json_graph_path;
}
}

rt = GetGraphInfoFromModel(qnn_model_wrapper, logger);
if (!rt) {
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/providers/qnn/builder/qnn_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ class QnnModel {
const onnxruntime::Node& fused_node,
const qnn::ModelSettings& model_settings,
const logging::Logger& logger,
const QnnGraph_Config_t** graph_configs = nullptr);
const QnnGraph_Config_t** graph_configs = nullptr,
const std::string& debug_json_graph_path = "");

Status FinalizeGraphs(const logging::Logger& logger);

Expand Down
6 changes: 5 additions & 1 deletion onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ bool QnnModelWrapper::CreateQnnNode(const std::string& qnn_node_name,
}
}

bool QnnModelWrapper::ComposeQnnGraph() {
bool QnnModelWrapper::ComposeQnnGraph(bool build_debug_json_graph) {
LOGS(logger_, VERBOSE) << "Compose Qnn Graph.";
// ORT_RETURN_IF(qnn_op_property_list_.empty(), "Empty Qnn op list, no graph to compose.");
if (qnn_op_property_list_.empty()) {
Expand Down Expand Up @@ -291,6 +291,10 @@ bool QnnModelWrapper::ComposeQnnGraph() {
LOGS(logger_, ERROR) << error_msg;
return false;
}

if (build_debug_json_graph) {
debug_json_graph_.AddOp(op_config_wrapper);
}
}

return true;
Expand Down
10 changes: 8 additions & 2 deletions onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@

#include "core/common/status.h"
#include "QnnInterface.h"
#include "qnn_def.h"
#include "core/providers/qnn/builder/qnn_def.h"
#include "core/providers/qnn/builder/qnn_utils.h"
#include "core/common/logging/logging.h"
#include "core/framework/node_unit.h"
#include "core/graph/graph_viewer.h"
Expand Down Expand Up @@ -91,7 +92,7 @@ class QnnModelWrapper {
std::vector<std::string>&& param_tensor_names,
bool do_op_validation = false);

bool ComposeQnnGraph();
bool ComposeQnnGraph(bool build_debug_json_graph = false);

Qnn_GraphHandle_t GetQnnGraph() { return graph_; }

Expand Down Expand Up @@ -127,6 +128,10 @@ class QnnModelWrapper {
return input_index_map_.find(tensor_name) != input_index_map_.end();
}

const nlohmann::json& GetQnnJSONGraph() {
return debug_json_graph_.Finalize();
}

Qnn_TensorType_t GetTensorType(const std::string& tensor_name) const {
if (IsInitializerInput(tensor_name)) {
return QNN_TENSOR_TYPE_STATIC;
Expand Down Expand Up @@ -270,6 +275,7 @@ class QnnModelWrapper {
const Qnn_BackendHandle_t& backend_handle_;
Qnn_GraphHandle_t graph_ = nullptr;
std::string graph_name_ = "";
utils::QnnJSONGraph debug_json_graph_;

std::vector<std::string> model_input_names_;
std::vector<std::string> model_output_names_;
Expand Down
Loading
Loading