Skip to content

Commit cb39e8f

Browse files
author
KK
committed
Merge pull request 'dev' (#14) from dev into master
Reviewed-on: https://git.conleylee.com/conley/Tengine/pulls/14
2 parents 7af0113 + d489e04 commit cb39e8f

File tree

144 files changed

+1434
-1018
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

144 files changed

+1434
-1018
lines changed

.drone.yml

+7-4
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,14 @@ steps:
1919
- ../tests/test_rv64_ops.sh
2020
- name: test models
2121
image: ubuntu20.04:qemu
22+
environment:
23+
DATA_SERVER_URL:
24+
from_secret: DATA_SERVER_URL
2225
commands:
2326
- cd build
24-
- wget -nv http://192.168.3.19:9999/tengine_model_zoo/ci_data/models.tar.gz
25-
- wget -nv http://192.168.3.19:9999/tengine_model_zoo/ci_data/images.tar.gz
26-
- wget -nv http://192.168.3.19:9999/tengine_model_zoo/ci_data/data_x86.tar.gz
27+
- wget -nv $${DATA_SERVER_URL}/tengine_model_zoo/ci_data/models.tar.gz
28+
- wget -nv $${DATA_SERVER_URL}/tengine_model_zoo/ci_data/images.tar.gz
29+
- wget -nv $${DATA_SERVER_URL}/tengine_model_zoo/ci_data/data_x86.tar.gz
2730
- mkdir models images data
2831
- tar zxvf models.tar.gz -C models
2932
- tar zxvf images.tar.gz -C images
@@ -37,7 +40,7 @@ steps:
3740
image: ubuntu20.04:qemu
3841
commands:
3942
- cd build
40-
- apt install lcov -y
43+
- apt update && apt install lcov -y
4144
- lcov --gcov-tool /home/riscv/bin/riscv64-unknown-linux-gnu-gcov --capture --directory . --output-file $${DRONE_REPO_NAME}.info
4245
- genhtml --branch-coverage -o ../codecov $${DRONE_REPO_NAME}.info
4346
- name: scp files

source/device/cpu/cpu_node.h

-3
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,6 @@ struct node_ops
8080

8181
/* score */
8282
int (*score)(struct node_ops*, struct exec_graph*, struct node*);
83-
84-
/* is ref op */
85-
bool is_ref_op;
8683
};
8784

8885
int init_exec_node(struct exec_graph* exec_graph, struct exec_node* exec_node, struct node* ir_node, struct node_ops* node_ops);

source/device/cpu/op/absval/absval_ref.c

+9-8
Original file line numberDiff line numberDiff line change
@@ -86,14 +86,15 @@ static int score(struct node_ops* node_ops, struct exec_graph* exec_graph, struc
8686
return OPS_SCORE_CANDO;
8787
}
8888

89-
static struct node_ops hcl_node_ops = {.prerun = prerun,
90-
.run = run,
91-
.reshape = NULL,
92-
.postrun = NULL,
93-
.init_node = init_node,
94-
.release_node = release_node,
95-
.score = score,
96-
.is_ref_op = true};
89+
static struct node_ops hcl_node_ops = {
90+
.prerun = prerun,
91+
.run = run,
92+
.reshape = NULL,
93+
.postrun = NULL,
94+
.init_node = init_node,
95+
.release_node = release_node,
96+
.score = score,
97+
};
9798

9899
int register_absval_ref_op()
99100
{

source/device/cpu/op/absval/cortex-a/absval_hcl_arm.c

+9-8
Original file line numberDiff line numberDiff line change
@@ -109,14 +109,15 @@ static int score(struct node_ops* node_ops, struct exec_graph* exec_graph, struc
109109
return OPS_SCORE_BEST;
110110
}
111111

112-
static struct node_ops hcl_node_ops = {.prerun = prerun,
113-
.run = run,
114-
.reshape = NULL,
115-
.postrun = NULL,
116-
.init_node = init_node,
117-
.release_node = release_node,
118-
.score = score,
119-
.is_ref_op = false};
112+
static struct node_ops hcl_node_ops = {
113+
.prerun = prerun,
114+
.run = run,
115+
.reshape = NULL,
116+
.postrun = NULL,
117+
.init_node = init_node,
118+
.release_node = release_node,
119+
.score = score,
120+
};
120121

121122
int register_absval_hcl_arm_op()
122123
{
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
#include "api/c_api.h"
2+
#include "graph/tensor.h"
3+
#include "graph/node.h"
4+
#include "graph/graph.h"
5+
#include "op/conv/risc-v/lp64dv/vsetvl_rvv.h"
6+
#include "utility/sys_port.h"
7+
#include "utility/log.h"
8+
#include "device/cpu/cpu_node.h"
9+
#include "device/cpu/cpu_graph.h"
10+
#include "operator/op.h"
11+
#include <math.h>
12+
#include "device/cpu/cpu_module.h"
13+
14+
static int init_node(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph)
15+
{
16+
return 0;
17+
}
18+
19+
static int release_node(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph)
20+
{
21+
return 0;
22+
}
23+
24+
static int prerun(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph)
25+
{
26+
return 0;
27+
}
28+
29+
static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph)
30+
{
31+
struct node* ir_node = exec_node->ir_node;
32+
struct graph* ir_graph = ir_node->graph;
33+
struct tensor* input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
34+
struct tensor* output_tensor = get_ir_graph_tensor(ir_graph, ir_node->output_tensors[0]);
35+
36+
const float* input_data = input_tensor->data;
37+
float* output_data = output_tensor->data;
38+
39+
const int batch = input_tensor->dims[0];
40+
const int channel = input_tensor->dims[1];
41+
const int img_size = input_tensor->dims[1] * input_tensor->dims[2] * input_tensor->dims[3];
42+
43+
vsetvl_e32_m2();
44+
45+
for (int b = 0; b < batch; ++b)
46+
{
47+
int i = 0;
48+
for (; i < (img_size & -8); i += 8)
49+
{
50+
asm("vle32.v v0, (%0);\n"
51+
"vfabs.v v2, v0;\n"
52+
"vse32.v v2, (%1);\n"
53+
:
54+
: "r"(input_data), "r"(output_data)
55+
: "memory");
56+
input_data += 8;
57+
output_data += 8;
58+
}
59+
60+
for (; i < img_size; ++i)
61+
{
62+
*output_data = fabsf(*input_data);
63+
output_data++;
64+
input_data++;
65+
}
66+
}
67+
68+
return 0;
69+
}
70+
71+
static int score(struct node_ops* node_ops, struct exec_graph* exec_graph, struct node* ir_node)
72+
{
73+
struct graph* graph = ir_node->graph;
74+
struct tensor* input_tensor = get_ir_graph_tensor(graph, ir_node->input_tensors[0]);
75+
if (input_tensor->data_type != TENGINE_MODE_FP32 || input_tensor->layout != TENGINE_LAYOUT_NCHW)
76+
{
77+
return 0;
78+
}
79+
80+
return OPS_SCORE_PREFER;
81+
}
82+
83+
static struct node_ops hcl_node_ops = {
84+
.prerun = prerun,
85+
.run = run,
86+
.reshape = NULL,
87+
.postrun = NULL,
88+
.init_node = init_node,
89+
.release_node = release_node,
90+
.score = score};
91+
92+
int register_absval_hcl_rv64_op()
93+
{
94+
return register_builtin_node_ops(OP_ABSVAL, &hcl_node_ops);
95+
}
96+
97+
int unregister_absval_hcl_rv64_op()
98+
{
99+
return unregister_builtin_node_ops(OP_ABSVAL, &hcl_node_ops);
100+
}

source/device/cpu/op/add_n/add_n_ref.c

+19-9
Original file line numberDiff line numberDiff line change
@@ -117,17 +117,27 @@ static int postrun(struct node_ops* node_ops, struct exec_node* exec_node, struc
117117

118118
static int score(struct node_ops* node_ops, struct exec_graph* exec_graph, struct node* exec_node)
119119
{
120-
return OPS_SCORE_BEST;
120+
struct node* ir_node = exec_node;
121+
struct graph* ir_graph = ir_node->graph;
122+
struct tensor* input_tensor;
123+
124+
input_tensor = get_ir_graph_tensor(ir_graph, ir_node->input_tensors[0]);
125+
126+
if (input_tensor->data_type != TENGINE_DT_FP32 || input_tensor->layout != TENGINE_LAYOUT_NCHW)
127+
return 0;
128+
129+
return OPS_SCORE_CANDO;
121130
}
122131

123-
static struct node_ops add_n_node_ops = {.prerun = prerun,
124-
.run = run,
125-
.reshape = NULL,
126-
.postrun = postrun,
127-
.init_node = init_node,
128-
.release_node = release_node,
129-
.score = score,
130-
.is_ref_op = true};
132+
static struct node_ops add_n_node_ops = {
133+
.prerun = prerun,
134+
.run = run,
135+
.reshape = NULL,
136+
.postrun = postrun,
137+
.init_node = init_node,
138+
.release_node = release_node,
139+
.score = score,
140+
};
131141

132142
int register_add_n_ref_op()
133143
{

0 commit comments

Comments
 (0)