Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

dnn: add attention layer#24476

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
asmorkalov merged 26 commits intoopencv:4.xfromfengyuentau:attention_layer
Dec 20, 2023
Merged
Show file tree
Hide file tree
Changes from1 commit
Commits
Show all changes
26 commits
Select commitHold shift + click to select a range
a403bec
first commit
fengyuentauNov 1, 2023
d892ef6
resolve comments; add packWeight
fengyuentauNov 7, 2023
aa8bf58
add single-thread fastGemm; implement the first matmul in attention
fengyuentauNov 9, 2023
a74faf5
fix several bugs to get correct internal results
fengyuentauNov 15, 2023
3b842e8
quick followup fix
fengyuentauNov 20, 2023
d9f1569
attention prob impl
fengyuentauNov 22, 2023
53e4b86
complete impl
fengyuentauNov 23, 2023
3ee63fd
add test case
fengyuentauNov 23, 2023
751cea7
add perf
fengyuentauNov 24, 2023
a1128aa
support v_Slice.end=INT64_MAX; support single-head attention subgraph…
fengyuentauNov 25, 2023
92701ea
add acc tests (commented for now); clear qkv_hidden_sizes everytime i…
fengyuentauNov 28, 2023
c8832ca
handle optional inputs in graph simplifier
fengyuentauNov 29, 2023
61965d6
add perf and acc test for vittrack (comment for now)
fengyuentauNov 29, 2023
0d36eac
revert graph simplifier changes before rebase
fengyuentauDec 7, 2023
0776fc5
fix graph simplifier
fengyuentauDec 7, 2023
9e617fa
clear perf results
fengyuentauDec 7, 2023
5c12f40
cpu only attention subgraph fusion
fengyuentauDec 9, 2023
5fce823
use OPENCV_DNN_BACKEND_DEFAULT
fengyuentauDec 9, 2023
677694a
changes by review
fengyuentauDec 10, 2023
35c3123
slice up to 5 inputs
fengyuentauDec 11, 2023
a4f4811
add acc and perf tests
fengyuentauDec 15, 2023
2db7246
empty commit to trigger tests
fengyuentauDec 15, 2023
846237d
empty commit to trigger tests 1
fengyuentauDec 15, 2023
51e2f25
try to make ci green by fixing output dimension problem
fengyuentauDec 20, 2023
2e5ea89
fix shape; set weight path of vittrack optional
fengyuentauDec 20, 2023
cb8ac70
Skit new test for CUDA FP16 for now.
asmorkalovDec 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
PrevPrevious commit
NextNext commit
try to make ci green by fixing output dimension problem
  • Loading branch information
@fengyuentau
fengyuentau committedDec 20, 2023
commit51e2f25e77bea54addaea60ebbe7efe63fca1d33
13 changes: 12 additions & 1 deletionmodules/dnn/src/layers/attention_layer.cpp
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -50,6 +50,8 @@ class AttentionLayerImpl CV_FINAL : public AttentionLayer {

scale = 1.f / params.get<float>("scale", sqrt(qkv_head_sizes[0]));

output_ndims = params.get<int>("output_ndims", 3);

is_prepacked = false;
}

Expand All@@ -72,7 +74,15 @@ class AttentionLayerImpl CV_FINAL : public AttentionLayer {
CV_CheckEQ(input_shape[2], weight_shape[0], "DNN/Attention: invalid input shape");
CV_CheckEQ(weight_shape[1], bias_shape[0], "DNN/Attention: invalid weight or bias shape");

outputs.assign(1, inputs[0]);
if (output_ndims == 3) {
outputs.assign(1, inputs[0]);
} else if (output_ndims == 2) {
int batch = input_shape[0], seq_len = input_shape[1], input_hidden_size = input_shape[2];
MatShape output_shape{batch * seq_len, input_hidden_size};
outputs.assign(1, output_shape);
} else {
CV_Error(Error::StsBadArg, format("DNN/Attention: invalid output dimension %zu, valid value is 2 or 3", output_ndims));
}
return false;
}

Expand DownExpand Up@@ -238,6 +248,7 @@ class AttentionLayerImpl CV_FINAL : public AttentionLayer {
size_t num_heads;
std::vector<size_t> qkv_hidden_sizes; // order: {qk_hidden_size, qk_hidden_size, v_hidden_size}
float scale;
size_t output_ndims;

std::vector<size_t> qkv_head_sizes; // order: {qk_head_size, qk_head_size, v_head_size}

Expand Down
30 changes: 20 additions & 10 deletionsmodules/dnn/src/onnx/onnx_graph_simplifier.cpp
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -335,7 +335,7 @@ class AttentionSubGraph : public Subgraph {
// qkv
int matmul_qkv = addNodeToMatch("MatMul", softmax_qk, transpose_v);
int transpose_qkv = addNodeToMatch("Transpose", matmul_qkv);
addNodeToMatch("Reshape", transpose_qkv, addNodeToMatch(""));
last_reshape =addNodeToMatch("Reshape", transpose_qkv, addNodeToMatch(""));

setFusedNode("Attention", input);
}
Expand DownExpand Up@@ -363,13 +363,11 @@ class AttentionSubGraph : public Subgraph {
// get attrs - num_heads, scale
num_heads = extractConstant(net, matchedNodesIds[reshape_q], 1).at<int>(1);
scale = extractConstant(net, matchedNodesIds[div_q], 1).at<float>(0);
// std::cout << "attention: num_heads=" << num_heads << ", qkv_hidden_sizes=" << qkv_hidden_sizes << ", scale=" << scale << std::endl;
output_ndims = extractConstant(net, matchedNodesIds[last_reshape], 1).dims;

// get names
weight_name = getInputName(net, matchedNodesIds[att_matmul], 1);
// std::cout << "attention: weight_name=" << weight_name << std::endl;
bias_name = getInputName(net, matchedNodesIds[att_add], 0);
// std::cout << "attention: bias_name=" << bias_name << std::endl;
return true;
}
return false;
Expand All@@ -392,6 +390,11 @@ class AttentionSubGraph : public Subgraph {
attr_scale->set_name("scale");
attr_scale->set_f(scale);

// add customized attrs
opencv_onnx::AttributeProto* attr_output_ndims = node->add_attribute();
attr_output_ndims->set_name("output_ndims");
attr_output_ndims->set_i(output_ndims);

// add inputs
node->add_input(weight_name);
node->add_input(bias_name);
Expand All@@ -400,12 +403,14 @@ class AttentionSubGraph : public Subgraph {
private:
int att_matmul, att_add;
int slice_q, slice_k, slice_v;
int reshape_q, div_q;
int reshape_q, div_q, last_reshape;

std::vector<int64_t> qkv_hidden_sizes; // order: [qk_hidden_size, qk_hidden_size, v_hidden_size]
int64_t num_heads;
float scale;

int64_t output_ndims;

std::string weight_name;
std::string bias_name;
};
Expand DownExpand Up@@ -441,7 +446,7 @@ class AttentionSingleHeadSubGraph : public Subgraph {
// qkv
int matmul_qkv = addNodeToMatch("MatMul", softmax_qk, transpose_v);
int transpose_qkv = addNodeToMatch("Transpose", matmul_qkv);
addNodeToMatch("Reshape", transpose_qkv, addNodeToMatch(""));
last_reshape =addNodeToMatch("Reshape", transpose_qkv, addNodeToMatch(""));

setFusedNode("Attention", input);
}
Expand DownExpand Up@@ -469,13 +474,11 @@ class AttentionSingleHeadSubGraph : public Subgraph {
// get attrs - num_heads, scale
num_heads = 1;
scale = extractConstant(net, matchedNodesIds[div_q], 1).at<float>(0);
// std::cout << "AttentionSingleHeadSubGraph: num_heads=" << num_heads << ", qkv_hidden_sizes=" << qkv_hidden_sizes << ", scale=" << scale << std::endl;
output_ndims = extractConstant(net, matchedNodesIds[last_reshape], 1).dims;

// get names
weight_name = getInputName(net, matchedNodesIds[att_matmul], 1);
// std::cout << "AttentionSingleHeadSubGraph: weight_name=" << weight_name << std::endl;
bias_name = getInputName(net, matchedNodesIds[att_add], 0);
// std::cout << "AttentionSingleHeadSubGraph: bias_name=" << bias_name << std::endl;
return true;
}
return false;
Expand All@@ -498,6 +501,11 @@ class AttentionSingleHeadSubGraph : public Subgraph {
attr_scale->set_name("scale");
attr_scale->set_f(scale);

// add customized attrs
opencv_onnx::AttributeProto* attr_output_ndims = node->add_attribute();
attr_output_ndims->set_name("output_ndims");
attr_output_ndims->set_i(output_ndims);

// add inputs
node->add_input(weight_name);
node->add_input(bias_name);
Expand All@@ -506,12 +514,14 @@ class AttentionSingleHeadSubGraph : public Subgraph {
protected:
int att_matmul, att_add;
int slice_q, slice_k, slice_v;
int div_q;
int div_q, last_reshape;

std::vector<int64_t> qkv_hidden_sizes; // order: [qk_hidden_size, qk_hidden_size, v_hidden_size]
int64_t num_heads;
float scale;

int64_t output_ndims;

std::string weight_name;
std::string bias_name;
};
Expand Down

[8]ページ先頭

©2009-2025 Movatter.jp