Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit2b6ec97

Browse files
authored
sync: update ggml (leejet#134)
1 parentdb38234 commit2b6ec97

File tree

9 files changed

+21
-65
lines changed

9 files changed

+21
-65
lines changed

‎clip.hpp‎

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -443,16 +443,13 @@ struct ResidualAttentionBlock {
443443
structggml_tensor* ln2_w;// [hidden_size, ]
444444
structggml_tensor* ln2_b;// [hidden_size, ]
445445

446-
structggml_tensor* attn_scale;// [hidden_size, ]
447-
448446
size_tcalculate_mem_size(ggml_type wtype) {
449447
double mem_size =0;
450448
mem_size +=4 * hidden_size * hidden_size *ggml_type_sizef(wtype);// q_w/k_w/v_w/out_w
451449
mem_size +=8 * hidden_size *ggml_type_sizef(GGML_TYPE_F32);// q_b/k_b/v_b/out_b/ln1_w/ln1_b/ln2_w/ln2_b
452450
mem_size +=2 * hidden_size * intermediate_size *ggml_type_sizef(wtype);// fc1_w/fc2_w
453451
mem_size += intermediate_size *ggml_type_sizef(GGML_TYPE_F32);// fc1_b
454452
mem_size += hidden_size *ggml_type_sizef(GGML_TYPE_F32);// fc2_b
455-
mem_size +=ggml_type_sizef(GGML_TYPE_F32);// attn_scale
456453
returnstatic_cast<size_t>(mem_size);
457454
}
458455

@@ -479,10 +476,6 @@ struct ResidualAttentionBlock {
479476
ln2_w =ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size);
480477
ln2_b =ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size);
481478

482-
attn_scale =ggml_new_tensor_1d(ctx, GGML_TYPE_F32,1);
483-
ggml_allocr_alloc(alloc, attn_scale);
484-
float scale =1.0f /sqrt((float)d_model);
485-
ggml_backend_tensor_set(attn_scale, &scale,0,sizeof(scale));
486479
}
487480

488481
voidmap_by_name(std::map<std::string,structggml_tensor*>& tensors,const std::string prefix) {
@@ -521,7 +514,7 @@ struct ResidualAttentionBlock {
521514
// self-attention
522515
{
523516
structggml_tensor* q =ggml_nn_linear(ctx, x, q_w, q_b);
524-
q =ggml_scale_inplace(ctx, q,attn_scale);
517+
q =ggml_scale_inplace(ctx, q,1.0f /sqrt((float)d_model));
525518
q =ggml_reshape_4d(ctx, q, d_model, n_head, n_token, N);// [N, n_token, n_head, d_model]
526519
q =ggml_cont(ctx,ggml_permute(ctx, q,0,2,1,3));// [N, n_head, n_token, d_model]
527520
q =ggml_reshape_3d(ctx, q, d_model, n_token, n_head * N);// [N * n_head, n_token, d_model]

‎esrgan.hpp‎

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ struct ResidualDenseBlock {
9191
tensors[prefix +"conv5.bias"] = conv5_b;
9292
}
9393

94-
ggml_tensor*forward(ggml_context* ctx,ggml_tensor* out_scale, ggml_tensor* x/* feat*/) {
94+
ggml_tensor*forward(ggml_context* ctx,float out_scale, ggml_tensor* x/* feat*/) {
9595
// x1 = self.lrelu(self.conv1(x))
9696
ggml_tensor* x1 =ggml_nn_conv_2d(ctx, x, conv1_w, conv1_b,1,1,1,1);
9797
x1 =ggml_leaky_relu(ctx, x1,0.2f,true);
@@ -161,7 +161,7 @@ struct EsrganBlock {
161161
}
162162
}
163163

164-
ggml_tensor*forward(ggml_context* ctx,ggml_tensor* out_scale, ggml_tensor* x) {
164+
ggml_tensor*forward(ggml_context* ctx,float out_scale, ggml_tensor* x) {
165165
ggml_tensor* out = x;
166166
for (int i =0; i < num_residual_blocks; i++) {
167167
// out = self.rdb...(x)
@@ -325,7 +325,7 @@ struct ESRGAN : public GGMLModule {
325325
tensors["conv_last.bias"] = conv_last_b;
326326
}
327327

328-
ggml_tensor*forward(ggml_context* ctx0,ggml_tensor* out_scale, ggml_tensor* x/* feat*/) {
328+
ggml_tensor*forward(ggml_context* ctx0,float out_scale, ggml_tensor* x/* feat*/) {
329329
// feat = self.conv_first(feat)
330330
auto h =ggml_nn_conv_2d(ctx0, x, conv_first_w, conv_first_b,1,1,1,1);
331331

@@ -376,12 +376,7 @@ struct ESRGAN : public GGMLModule {
376376
structggml_cgraph* gf =ggml_new_graph(ctx0);
377377

378378
structggml_tensor* x_ =NULL;
379-
structggml_tensor* os =ggml_new_tensor_1d(ctx0, GGML_TYPE_F32,1);
380-
ggml_allocr_alloc(compute_allocr, os);
381-
if (!ggml_allocr_is_measure(compute_allocr)) {
382-
float scale =0.2f;
383-
ggml_backend_tensor_set(os, &scale,0,sizeof(scale));
384-
}
379+
float out_scale =0.2f;
385380

386381
// it's performing a compute, check if backend isn't cpu
387382
if (!ggml_backend_is_cpu(backend)) {
@@ -397,7 +392,7 @@ struct ESRGAN : public GGMLModule {
397392
x_ = x;
398393
}
399394

400-
structggml_tensor* out =forward(ctx0,os, x);
395+
structggml_tensor* out =forward(ctx0,out_scale, x);
401396

402397
ggml_build_forward_expand(gf, out);
403398
ggml_free(ctx0);

‎ggml‎

Submoduleggml updated44 files

‎ggml_extend.hpp‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct
449449
structggml_tensor* w,
450450
structggml_tensor* b,
451451
int num_groups =32) {
452-
if (x->n_dims ==4) {
452+
if (ggml_n_dims(x) >=3) {
453453
w =ggml_reshape_4d(ctx, w,1,1, w->ne[0],1);
454454
b =ggml_reshape_4d(ctx, b,1,1, b->ne[0],1);
455455
}

‎lora.hpp‎

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ struct LoraModel : public GGMLModule {
113113
applied_lora_tensors.insert(scale_name);
114114

115115
// calc_cale
116-
int64_t dim = lora_down->ne[lora_down->n_dims -1];
116+
int64_t dim = lora_down->ne[ggml_n_dims(lora_down) -1];
117117
float scale_value =1.0f;
118118
if (lora_tensors.find(scale_name) != lora_tensors.end()) {
119119
scale_value =ggml_backend_tensor_get_f32(lora_tensors[scale_name]);
@@ -123,17 +123,10 @@ struct LoraModel : public GGMLModule {
123123
}
124124
scale_value *= multiplier;
125125

126-
ggml_tensor* lora_scale =ggml_new_tensor_1d(ctx0, GGML_TYPE_F32,1);
127-
128-
ggml_allocr_alloc(compute_allocr, lora_scale);
129-
if (!ggml_allocr_is_measure(compute_allocr)) {
130-
ggml_backend_tensor_set(lora_scale, &scale_value,0,ggml_nbytes(lora_scale));
131-
}
132-
133126
// flat lora tensors to multiply it
134-
int64_t lora_up_rows = lora_up->ne[lora_up->n_dims -1];
127+
int64_t lora_up_rows = lora_up->ne[ggml_n_dims(lora_up) -1];
135128
lora_up =ggml_reshape_2d(ctx0, lora_up,ggml_nelements(lora_up) / lora_up_rows, lora_up_rows);
136-
int64_t lora_down_rows = lora_down->ne[lora_down->n_dims -1];
129+
int64_t lora_down_rows = lora_down->ne[ggml_n_dims(lora_down) -1];
137130
lora_down =ggml_reshape_2d(ctx0, lora_down,ggml_nelements(lora_down) / lora_down_rows, lora_down_rows);
138131

139132
// ggml_mul_mat requires tensor b transposed
@@ -142,7 +135,7 @@ struct LoraModel : public GGMLModule {
142135
updown =ggml_cont(ctx0,ggml_transpose(ctx0, updown));
143136
updown =ggml_reshape(ctx0, updown, weight);
144137
GGML_ASSERT(ggml_nelements(updown) ==ggml_nelements(weight));
145-
updown =ggml_scale_inplace(ctx0, updown,lora_scale);
138+
updown =ggml_scale_inplace(ctx0, updown,scale_value);
146139
ggml_tensor* final_weight;
147140
// if (weight->type != GGML_TYPE_F32 && weight->type != GGML_TYPE_F16) {
148141
// final_weight = ggml_new_tensor(ctx0, GGML_TYPE_F32, weight->n_dims, weight->ne);

‎model.cpp‎

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -673,7 +673,7 @@ bool ModelLoader::init_from_gguf_file(const std::string& file_path, const std::s
673673

674674
// LOG_DEBUG("%s", name.c_str());
675675

676-
TensorStoragetensor_storage(prefix + name, dummy->type, dummy->ne, dummy->n_dims, file_index, offset);
676+
TensorStoragetensor_storage(prefix + name, dummy->type, dummy->ne,ggml_n_dims(dummy), file_index, offset);
677677

678678
GGML_ASSERT(ggml_nbytes(dummy) == tensor_storage.nbytes());
679679

@@ -1417,6 +1417,9 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso
14171417
if (pair.first.find("cond_stage_model.transformer.text_model.encoder.layers.23") != std::string::npos) {
14181418
continue;
14191419
}
1420+
if (pair.first.find("alphas_cumprod") != std::string::npos) {
1421+
continue;
1422+
}
14201423

14211424
if (pair.first.find("alphas_cumprod") != std::string::npos) {
14221425
continue;

‎tae.hpp‎

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -278,9 +278,6 @@ struct TinyDecoder {
278278
ggml_tensor* conv_final_w;// [output_channels, channels, 3, 3]
279279
ggml_tensor* conv_final_b;// [output_channels]
280280

281-
ggml_tensor* in_scale_1d3;// [1]
282-
ggml_tensor* in_scale_3;// [1]
283-
284281
TinyDecoder() {
285282
for (int i =0; i < num_blocks; i++) {
286283
input_blocks[i].in_channels = channels;
@@ -351,16 +348,6 @@ struct TinyDecoder {
351348
}
352349

353350
final_block.init_params(ctx);
354-
355-
// initialize constants scales
356-
in_scale_1d3 =ggml_new_tensor_1d(ctx, GGML_TYPE_F32,1);
357-
in_scale_3 =ggml_new_tensor_1d(ctx, GGML_TYPE_F32,1);
358-
ggml_allocr_alloc(alloc, in_scale_1d3);
359-
float scale_1d3 =1.0f /3.0f;
360-
ggml_backend_tensor_set(in_scale_1d3, &scale_1d3,0,sizeof(scale_1d3));
361-
ggml_allocr_alloc(alloc, in_scale_3);
362-
float scale_3 =3.0f;
363-
ggml_backend_tensor_set(in_scale_3, &scale_3,0,sizeof(scale_3));
364351
}
365352

366353
voidmap_by_name(std::map<std::string, ggml_tensor*>& tensors, std::string prefix) {
@@ -391,9 +378,9 @@ struct TinyDecoder {
391378

392379
ggml_tensor*forward(ggml_context* ctx, ggml_tensor* z) {
393380
// torch.tanh(x / 3) * 3
394-
auto h =ggml_scale(ctx, z,in_scale_1d3);
381+
auto h =ggml_scale(ctx, z,1.0f /3.0f);
395382
h =ggml_tanh_inplace(ctx, h);
396-
h =ggml_scale(ctx, h,in_scale_3);
383+
h =ggml_scale(ctx, h,3.0f);
397384

398385
// conv(4, 64)
399386
h =ggml_nn_conv_2d(ctx, h, conv_input_w, conv_input_b,1,1,1,1);

‎unet.hpp‎

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -182,8 +182,6 @@ struct SpatialTransformer {
182182

183183
std::vector<Transformer> transformers;
184184

185-
structggml_tensor* attn_scale;
186-
187185
// proj_out
188186
structggml_tensor* proj_out_w;// [in_channels, in_channels, 1, 1]
189187
structggml_tensor* proj_out_b;// [in_channels,]
@@ -202,7 +200,6 @@ struct SpatialTransformer {
202200
mem_size +=2 * in_channels *ggml_type_sizef(GGML_TYPE_F32);// norm_w/norm_b
203201
mem_size +=2 * in_channels * in_channels *1 *1 *ggml_type_sizef(GGML_TYPE_F16);// proj_in_w/proj_out_w
204202
mem_size +=2 * in_channels *ggml_type_sizef(GGML_TYPE_F32);// proj_in_b/proj_out_b
205-
mem_size +=1 *ggml_type_sizef(GGML_TYPE_F32);// attn_scale
206203

207204
// transformer
208205
for (auto& transformer : transformers) {
@@ -226,11 +223,6 @@ struct SpatialTransformer {
226223
proj_out_w =ggml_new_tensor_4d(ctx, GGML_TYPE_F16,1,1, in_channels, in_channels);
227224
proj_out_b =ggml_new_tensor_1d(ctx, GGML_TYPE_F32, in_channels);
228225

229-
attn_scale =ggml_new_tensor_1d(ctx, GGML_TYPE_F32,1);
230-
ggml_allocr_alloc(alloc, attn_scale);
231-
float scale =1.0f /sqrt((float)d_head);
232-
ggml_backend_tensor_set(attn_scale, &scale,0,sizeof(scale));
233-
234226
// transformer
235227
for (auto& transformer : transformers) {
236228
transformer.norm1_w =ggml_new_tensor_1d(ctx, GGML_TYPE_F32, in_channels);
@@ -332,7 +324,7 @@ struct SpatialTransformer {
332324
x =ggml_reshape_2d(ctx, x, c, h * w * n);// [N * h * w, in_channels]
333325
structggml_tensor* q =ggml_mul_mat(ctx, transformer.attn1_q_w, x);// [N * h * w, in_channels]
334326
#if !defined(SD_USE_FLASH_ATTENTION) || defined(SD_USE_CUBLAS) || defined(SD_USE_METAL)
335-
q =ggml_scale_inplace(ctx, q,attn_scale);
327+
q =ggml_scale_inplace(ctx, q,1.0f /sqrt((float)d_head));
336328
#endif
337329
q =ggml_reshape_4d(ctx, q, d_head, n_head, h * w, n);// [N, h * w, n_head, d_head]
338330
q =ggml_cont(ctx,ggml_permute(ctx, q,0,2,1,3));// [N, n_head, h * w, d_head]
@@ -380,7 +372,7 @@ struct SpatialTransformer {
380372
context =ggml_reshape_2d(ctx, context, context->ne[0], context->ne[1] * context->ne[2]);// [N * max_position, hidden_size]
381373
structggml_tensor* q =ggml_mul_mat(ctx, transformer.attn2_q_w, x);// [N * h * w, in_channels]
382374
#if !defined(SD_USE_FLASH_ATTENTION) || defined(SD_USE_CUBLAS) || defined(SD_USE_METAL)
383-
q =ggml_scale_inplace(ctx, q,attn_scale);
375+
q =ggml_scale_inplace(ctx, q,1.0f /sqrt((float)d_head));
384376
#endif
385377
q =ggml_reshape_4d(ctx, q, d_head, n_head, h * w, n);// [N, h * w, n_head, d_head]
386378
q =ggml_cont(ctx,ggml_permute(ctx, q,0,2,1,3));// [N, n_head, h * w, d_head]

‎vae.hpp‎

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,6 @@ struct AttnBlock {
118118
structggml_tensor* proj_out_w;// [in_channels, in_channels, 1, 1]
119119
structggml_tensor* proj_out_b;// [in_channels,]
120120

121-
structggml_tensor* attn_scale;
122-
123121
size_tcalculate_mem_size(ggml_type wtype) {
124122
double mem_size =0;
125123
mem_size +=6 * in_channels *ggml_type_sizef(GGML_TYPE_F32);// norm_w/norm_b/q_b/k_v/v_b/proj_out_b
@@ -140,11 +138,6 @@ struct AttnBlock {
140138

141139
proj_out_w =ggml_new_tensor_4d(ctx, GGML_TYPE_F16,1,1, in_channels, in_channels);
142140
proj_out_b =ggml_new_tensor_1d(ctx, GGML_TYPE_F32, in_channels);
143-
144-
attn_scale =ggml_new_tensor_1d(ctx, GGML_TYPE_F32,1);
145-
ggml_allocr_alloc(alloc, attn_scale);
146-
float scale =1.0f /sqrt((float)in_channels);
147-
ggml_backend_tensor_set(attn_scale, &scale,0,sizeof(scale));
148141
}
149142

150143
voidmap_by_name(std::map<std::string,structggml_tensor*>& tensors,const std::string prefix) {
@@ -181,7 +174,7 @@ struct AttnBlock {
181174
k =ggml_reshape_3d(ctx, k, c, h * w, n);// [N, h * w, in_channels]
182175

183176
auto w_ =ggml_mul_mat(ctx, k, q);// [N, h * w, h * w]
184-
w_ =ggml_scale_inplace(ctx, w_,attn_scale);
177+
w_ =ggml_scale_inplace(ctx, w_,1.0f /sqrt((float)in_channels));
185178
w_ =ggml_soft_max_inplace(ctx, w_);
186179

187180
v =ggml_reshape_3d(ctx, v, h * w, c, n);// [N, in_channels, h * w]

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp