basicbh/stable-diffusion.cppPublic

NotificationsYou must be signed in to change notification settings
Fork0
Star0

Commit2b6ec97

authored

sync: update ggml (leejet#134)

1 parentdb38234 commit2b6ec97Copy full SHA for 2b6ec97

File tree

9 files changed

+21

-65

lines changed

9 files changed

+21

-65

lines changed

`‎clip.hpp‎`

Lines changed: 1 addition & 8 deletions

Original file line number	Diff line number	Diff line change
`@@ -443,16 +443,13 @@ struct ResidualAttentionBlock {`
`443`	`443`	`structggml_tensor* ln2_w;// [hidden_size, ]`
`444`	`444`	`structggml_tensor* ln2_b;// [hidden_size, ]`
`445`	`445`
`446`		`-structggml_tensor* attn_scale;// [hidden_size, ]`
`447`		`-`
`448`	`446`	`size_tcalculate_mem_size(ggml_type wtype) {`
`449`	`447`	`double mem_size =0;`
`450`	`448`	`mem_size +=4 * hidden_size * hidden_size *ggml_type_sizef(wtype);// q_w/k_w/v_w/out_w`
`451`	`449`	`mem_size +=8 * hidden_size *ggml_type_sizef(GGML_TYPE_F32);// q_b/k_b/v_b/out_b/ln1_w/ln1_b/ln2_w/ln2_b`
`452`	`450`	`mem_size +=2 * hidden_size * intermediate_size *ggml_type_sizef(wtype);// fc1_w/fc2_w`
`453`	`451`	`mem_size += intermediate_size *ggml_type_sizef(GGML_TYPE_F32);// fc1_b`
`454`	`452`	`mem_size += hidden_size *ggml_type_sizef(GGML_TYPE_F32);// fc2_b`
`455`		`- mem_size +=ggml_type_sizef(GGML_TYPE_F32);// attn_scale`
`456`	`453`	`returnstatic_cast<size_t>(mem_size);`
`457`	`454`	`}`
`458`	`455`
`@@ -479,10 +476,6 @@ struct ResidualAttentionBlock {`
`479`	`476`	`ln2_w =ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size);`
`480`	`477`	`ln2_b =ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size);`
`481`	`478`
`482`		`- attn_scale =ggml_new_tensor_1d(ctx, GGML_TYPE_F32,1);`
`483`		`-ggml_allocr_alloc(alloc, attn_scale);`
`484`		`-float scale =1.0f /sqrt((float)d_model);`
`485`		`-ggml_backend_tensor_set(attn_scale, &scale,0,sizeof(scale));`
`486`	`479`	`}`
`487`	`480`
`488`	`481`	`voidmap_by_name(std::map<std::string,structggml_tensor*>& tensors,const std::string prefix) {`
`@@ -521,7 +514,7 @@ struct ResidualAttentionBlock {`
`521`	`514`	`// self-attention`
`522`	`515`	`{`
`523`	`516`	`structggml_tensor* q =ggml_nn_linear(ctx, x, q_w, q_b);`
`524`		`- q =ggml_scale_inplace(ctx, q,attn_scale);`
	`517`	`+ q =ggml_scale_inplace(ctx, q,1.0f /sqrt((float)d_model));`
`525`	`518`	`q =ggml_reshape_4d(ctx, q, d_model, n_head, n_token, N);// [N, n_token, n_head, d_model]`
`526`	`519`	`q =ggml_cont(ctx,ggml_permute(ctx, q,0,2,1,3));// [N, n_head, n_token, d_model]`
`527`	`520`	`q =ggml_reshape_3d(ctx, q, d_model, n_token, n_head * N);// [N * n_head, n_token, d_model]`

`‎esrgan.hpp‎`

Lines changed: 5 additions & 10 deletions

Original file line number	Diff line number	Diff line change
`@@ -91,7 +91,7 @@ struct ResidualDenseBlock {`
`91`	`91`	`tensors[prefix +"conv5.bias"] = conv5_b;`
`92`	`92`	`}`
`93`	`93`
`94`		`- ggml_tensorforward(ggml_context ctx,ggml_tensor* out_scale, ggml_tensor* x/* feat*/) {`
	`94`	`+ ggml_tensorforward(ggml_context ctx,float out_scale, ggml_tensor* x/* feat*/) {`
`95`	`95`	`// x1 = self.lrelu(self.conv1(x))`
`96`	`96`	`ggml_tensor* x1 =ggml_nn_conv_2d(ctx, x, conv1_w, conv1_b,1,1,1,1);`
`97`	`97`	`x1 =ggml_leaky_relu(ctx, x1,0.2f,true);`
`@@ -161,7 +161,7 @@ struct EsrganBlock {`
`161`	`161`	`}`
`162`	`162`	`}`
`163`	`163`
`164`		`- ggml_tensorforward(ggml_context ctx,ggml_tensor* out_scale, ggml_tensor* x) {`
	`164`	`+ ggml_tensorforward(ggml_context ctx,float out_scale, ggml_tensor* x) {`
`165`	`165`	`ggml_tensor* out = x;`
`166`	`166`	`for (int i =0; i < num_residual_blocks; i++) {`
`167`	`167`	`// out = self.rdb...(x)`
`@@ -325,7 +325,7 @@ struct ESRGAN : public GGMLModule {`
`325`	`325`	`tensors["conv_last.bias"] = conv_last_b;`
`326`	`326`	`}`
`327`	`327`
`328`		`- ggml_tensorforward(ggml_context ctx0,ggml_tensor* out_scale, ggml_tensor* x/* feat*/) {`
	`328`	`+ ggml_tensorforward(ggml_context ctx0,float out_scale, ggml_tensor* x/* feat*/) {`
`329`	`329`	`// feat = self.conv_first(feat)`
`330`	`330`	`auto h =ggml_nn_conv_2d(ctx0, x, conv_first_w, conv_first_b,1,1,1,1);`
`331`	`331`
`@@ -376,12 +376,7 @@ struct ESRGAN : public GGMLModule {`
`376`	`376`	`structggml_cgraph* gf =ggml_new_graph(ctx0);`
`377`	`377`
`378`	`378`	`structggml_tensor* x_ =NULL;`
`379`		`-structggml_tensor* os =ggml_new_tensor_1d(ctx0, GGML_TYPE_F32,1);`
`380`		`-ggml_allocr_alloc(compute_allocr, os);`
`381`		`-if (!ggml_allocr_is_measure(compute_allocr)) {`
`382`		`-float scale =0.2f;`
`383`		`-ggml_backend_tensor_set(os, &scale,0,sizeof(scale));`
`384`		`- }`
	`379`	`+float out_scale =0.2f;`
`385`	`380`
`386`	`381`	`// it's performing a compute, check if backend isn't cpu`
`387`	`382`	`if (!ggml_backend_is_cpu(backend)) {`
`@@ -397,7 +392,7 @@ struct ESRGAN : public GGMLModule {`
`397`	`392`	`x_ = x;`
`398`	`393`	`}`
`399`	`394`
`400`		`-structggml_tensor* out =forward(ctx0,os, x);`
	`395`	`+structggml_tensor* out =forward(ctx0,out_scale, x);`
`401`	`396`
`402`	`397`	`ggml_build_forward_expand(gf, out);`
`403`	`398`	`ggml_free(ctx0);`

`‎ggml‎`

Submoduleggml updated44 files

`‎ggml_extend.hpp‎`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -449,7 +449,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct`
`449`	`449`	`structggml_tensor* w,`
`450`	`450`	`structggml_tensor* b,`
`451`	`451`	`int num_groups =32) {`
`452`		`-if (x->n_dims ==4) {`
	`452`	`+if (ggml_n_dims(x) >=3) {`
`453`	`453`	`w =ggml_reshape_4d(ctx, w,1,1, w->ne[0],1);`
`454`	`454`	`b =ggml_reshape_4d(ctx, b,1,1, b->ne[0],1);`
`455`	`455`	`}`

`‎lora.hpp‎`

Lines changed: 4 additions & 11 deletions

Original file line number	Diff line number	Diff line change
`@@ -113,7 +113,7 @@ struct LoraModel : public GGMLModule {`
`113`	`113`	`applied_lora_tensors.insert(scale_name);`
`114`	`114`
`115`	`115`	`// calc_cale`
`116`		`-int64_t dim = lora_down->ne[lora_down->n_dims -1];`
	`116`	`+int64_t dim = lora_down->ne[ggml_n_dims(lora_down) -1];`
`117`	`117`	`float scale_value =1.0f;`
`118`	`118`	`if (lora_tensors.find(scale_name) != lora_tensors.end()) {`
`119`	`119`	`scale_value =ggml_backend_tensor_get_f32(lora_tensors[scale_name]);`
`@@ -123,17 +123,10 @@ struct LoraModel : public GGMLModule {`
`123`	`123`	`}`
`124`	`124`	`scale_value *= multiplier;`
`125`	`125`
`126`		`- ggml_tensor* lora_scale =ggml_new_tensor_1d(ctx0, GGML_TYPE_F32,1);`
`127`		`-`
`128`		`-ggml_allocr_alloc(compute_allocr, lora_scale);`
`129`		`-if (!ggml_allocr_is_measure(compute_allocr)) {`
`130`		`-ggml_backend_tensor_set(lora_scale, &scale_value,0,ggml_nbytes(lora_scale));`
`131`		`- }`
`132`		`-`
`133`	`126`	`// flat lora tensors to multiply it`
`134`		`-int64_t lora_up_rows = lora_up->ne[lora_up->n_dims -1];`
	`127`	`+int64_t lora_up_rows = lora_up->ne[ggml_n_dims(lora_up) -1];`
`135`	`128`	`lora_up =ggml_reshape_2d(ctx0, lora_up,ggml_nelements(lora_up) / lora_up_rows, lora_up_rows);`
`136`		`-int64_t lora_down_rows = lora_down->ne[lora_down->n_dims -1];`
	`129`	`+int64_t lora_down_rows = lora_down->ne[ggml_n_dims(lora_down) -1];`
`137`	`130`	`lora_down =ggml_reshape_2d(ctx0, lora_down,ggml_nelements(lora_down) / lora_down_rows, lora_down_rows);`
`138`	`131`
`139`	`132`	`// ggml_mul_mat requires tensor b transposed`
`@@ -142,7 +135,7 @@ struct LoraModel : public GGMLModule {`
`142`	`135`	`updown =ggml_cont(ctx0,ggml_transpose(ctx0, updown));`
`143`	`136`	`updown =ggml_reshape(ctx0, updown, weight);`
`144`	`137`	`GGML_ASSERT(ggml_nelements(updown) ==ggml_nelements(weight));`
`145`		`- updown =ggml_scale_inplace(ctx0, updown,lora_scale);`
	`138`	`+ updown =ggml_scale_inplace(ctx0, updown,scale_value);`
`146`	`139`	`ggml_tensor* final_weight;`
`147`	`140`	`// if (weight->type != GGML_TYPE_F32 && weight->type != GGML_TYPE_F16) {`
`148`	`141`	`// final_weight = ggml_new_tensor(ctx0, GGML_TYPE_F32, weight->n_dims, weight->ne);`

`‎model.cpp‎`

Lines changed: 4 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -673,7 +673,7 @@ bool ModelLoader::init_from_gguf_file(const std::string& file_path, const std::s`
`673`	`673`
`674`	`674`	`// LOG_DEBUG("%s", name.c_str());`
`675`	`675`
`676`		`- TensorStoragetensor_storage(prefix + name, dummy->type, dummy->ne, dummy->n_dims, file_index, offset);`
	`676`	`+ TensorStoragetensor_storage(prefix + name, dummy->type, dummy->ne,ggml_n_dims(dummy), file_index, offset);`
`677`	`677`
`678`	`678`	`GGML_ASSERT(ggml_nbytes(dummy) == tensor_storage.nbytes());`
`679`	`679`
`@@ -1417,6 +1417,9 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso`
`1417`	`1417`	`if (pair.first.find("cond_stage_model.transformer.text_model.encoder.layers.23") != std::string::npos) {`
`1418`	`1418`	`continue;`
`1419`	`1419`	`}`
	`1420`	`+if (pair.first.find("alphas_cumprod") != std::string::npos) {`
	`1421`	`+continue;`
	`1422`	`+ }`
`1420`	`1423`
`1421`	`1424`	`if (pair.first.find("alphas_cumprod") != std::string::npos) {`
`1422`	`1425`	`continue;`

`‎tae.hpp‎`

Lines changed: 2 additions & 15 deletions

Original file line number	Diff line number	Diff line change
`@@ -278,9 +278,6 @@ struct TinyDecoder {`
`278`	`278`	`ggml_tensor* conv_final_w;// [output_channels, channels, 3, 3]`
`279`	`279`	`ggml_tensor* conv_final_b;// [output_channels]`
`280`	`280`
`281`		`- ggml_tensor* in_scale_1d3;// [1]`
`282`		`- ggml_tensor* in_scale_3;// [1]`
`283`		`-`
`284`	`281`	`TinyDecoder() {`
`285`	`282`	`for (int i =0; i < num_blocks; i++) {`
`286`	`283`	`input_blocks[i].in_channels = channels;`
`@@ -351,16 +348,6 @@ struct TinyDecoder {`
`351`	`348`	`}`
`352`	`349`
`353`	`350`	`final_block.init_params(ctx);`
`354`		`-`
`355`		`-// initialize constants scales`
`356`		`- in_scale_1d3 =ggml_new_tensor_1d(ctx, GGML_TYPE_F32,1);`
`357`		`- in_scale_3 =ggml_new_tensor_1d(ctx, GGML_TYPE_F32,1);`
`358`		`-ggml_allocr_alloc(alloc, in_scale_1d3);`
`359`		`-float scale_1d3 =1.0f /3.0f;`
`360`		`-ggml_backend_tensor_set(in_scale_1d3, &scale_1d3,0,sizeof(scale_1d3));`
`361`		`-ggml_allocr_alloc(alloc, in_scale_3);`
`362`		`-float scale_3 =3.0f;`
`363`		`-ggml_backend_tensor_set(in_scale_3, &scale_3,0,sizeof(scale_3));`
`364`	`351`	`}`
`365`	`352`
`366`	`353`	`voidmap_by_name(std::map<std::string, ggml_tensor*>& tensors, std::string prefix) {`
`@@ -391,9 +378,9 @@ struct TinyDecoder {`
`391`	`378`
`392`	`379`	`ggml_tensorforward(ggml_context ctx, ggml_tensor* z) {`
`393`	`380`	`// torch.tanh(x / 3) * 3`
`394`		`-auto h =ggml_scale(ctx, z,in_scale_1d3);`
	`381`	`+auto h =ggml_scale(ctx, z,1.0f /3.0f);`
`395`	`382`	`h =ggml_tanh_inplace(ctx, h);`
`396`		`- h =ggml_scale(ctx, h,in_scale_3);`
	`383`	`+ h =ggml_scale(ctx, h,3.0f);`
`397`	`384`
`398`	`385`	`// conv(4, 64)`
`399`	`386`	`h =ggml_nn_conv_2d(ctx, h, conv_input_w, conv_input_b,1,1,1,1);`

`‎unet.hpp‎`

Lines changed: 2 additions & 10 deletions

Original file line number	Diff line number	Diff line change
`@@ -182,8 +182,6 @@ struct SpatialTransformer {`
`182`	`182`
`183`	`183`	`std::vector<Transformer> transformers;`
`184`	`184`
`185`		`-structggml_tensor* attn_scale;`
`186`		`-`
`187`	`185`	`// proj_out`
`188`	`186`	`structggml_tensor* proj_out_w;// [in_channels, in_channels, 1, 1]`
`189`	`187`	`structggml_tensor* proj_out_b;// [in_channels,]`
`@@ -202,7 +200,6 @@ struct SpatialTransformer {`
`202`	`200`	`mem_size +=2 * in_channels *ggml_type_sizef(GGML_TYPE_F32);// norm_w/norm_b`
`203`	`201`	`mem_size +=2 * in_channels * in_channels 1 1 *ggml_type_sizef(GGML_TYPE_F16);// proj_in_w/proj_out_w`
`204`	`202`	`mem_size +=2 * in_channels *ggml_type_sizef(GGML_TYPE_F32);// proj_in_b/proj_out_b`
`205`		`- mem_size +=1 *ggml_type_sizef(GGML_TYPE_F32);// attn_scale`
`206`	`203`
`207`	`204`	`// transformer`
`208`	`205`	`for (auto& transformer : transformers) {`
`@@ -226,11 +223,6 @@ struct SpatialTransformer {`
`226`	`223`	`proj_out_w =ggml_new_tensor_4d(ctx, GGML_TYPE_F16,1,1, in_channels, in_channels);`
`227`	`224`	`proj_out_b =ggml_new_tensor_1d(ctx, GGML_TYPE_F32, in_channels);`
`228`	`225`
`229`		`- attn_scale =ggml_new_tensor_1d(ctx, GGML_TYPE_F32,1);`
`230`		`-ggml_allocr_alloc(alloc, attn_scale);`
`231`		`-float scale =1.0f /sqrt((float)d_head);`
`232`		`-ggml_backend_tensor_set(attn_scale, &scale,0,sizeof(scale));`
`233`		`-`
`234`	`226`	`// transformer`
`235`	`227`	`for (auto& transformer : transformers) {`
`236`	`228`	`transformer.norm1_w =ggml_new_tensor_1d(ctx, GGML_TYPE_F32, in_channels);`
`@@ -332,7 +324,7 @@ struct SpatialTransformer {`
`332`	`324`	`x =ggml_reshape_2d(ctx, x, c, h * w * n);// [N * h * w, in_channels]`
`333`	`325`	`structggml_tensor* q =ggml_mul_mat(ctx, transformer.attn1_q_w, x);// [N * h * w, in_channels]`
`334`	`326`	`#if !defined(SD_USE_FLASH_ATTENTION) \|\| defined(SD_USE_CUBLAS) \|\| defined(SD_USE_METAL)`
`335`		`- q =ggml_scale_inplace(ctx, q,attn_scale);`
	`327`	`+ q =ggml_scale_inplace(ctx, q,1.0f /sqrt((float)d_head));`
`336`	`328`	`#endif`
`337`	`329`	`q =ggml_reshape_4d(ctx, q, d_head, n_head, h * w, n);// [N, h * w, n_head, d_head]`
`338`	`330`	`q =ggml_cont(ctx,ggml_permute(ctx, q,0,2,1,3));// [N, n_head, h * w, d_head]`
`@@ -380,7 +372,7 @@ struct SpatialTransformer {`
`380`	`372`	`context =ggml_reshape_2d(ctx, context, context->ne[0], context->ne[1] * context->ne[2]);// [N * max_position, hidden_size]`
`381`	`373`	`structggml_tensor* q =ggml_mul_mat(ctx, transformer.attn2_q_w, x);// [N * h * w, in_channels]`
`382`	`374`	`#if !defined(SD_USE_FLASH_ATTENTION) \|\| defined(SD_USE_CUBLAS) \|\| defined(SD_USE_METAL)`
`383`		`- q =ggml_scale_inplace(ctx, q,attn_scale);`
	`375`	`+ q =ggml_scale_inplace(ctx, q,1.0f /sqrt((float)d_head));`
`384`	`376`	`#endif`
`385`	`377`	`q =ggml_reshape_4d(ctx, q, d_head, n_head, h * w, n);// [N, h * w, n_head, d_head]`
`386`	`378`	`q =ggml_cont(ctx,ggml_permute(ctx, q,0,2,1,3));// [N, n_head, h * w, d_head]`

`‎vae.hpp‎`

Lines changed: 1 addition & 8 deletions

Original file line number	Diff line number	Diff line change
`@@ -118,8 +118,6 @@ struct AttnBlock {`
`118`	`118`	`structggml_tensor* proj_out_w;// [in_channels, in_channels, 1, 1]`
`119`	`119`	`structggml_tensor* proj_out_b;// [in_channels,]`
`120`	`120`
`121`		`-structggml_tensor* attn_scale;`
`122`		`-`
`123`	`121`	`size_tcalculate_mem_size(ggml_type wtype) {`
`124`	`122`	`double mem_size =0;`
`125`	`123`	`mem_size +=6 * in_channels *ggml_type_sizef(GGML_TYPE_F32);// norm_w/norm_b/q_b/k_v/v_b/proj_out_b`
`@@ -140,11 +138,6 @@ struct AttnBlock {`
`140`	`138`
`141`	`139`	`proj_out_w =ggml_new_tensor_4d(ctx, GGML_TYPE_F16,1,1, in_channels, in_channels);`
`142`	`140`	`proj_out_b =ggml_new_tensor_1d(ctx, GGML_TYPE_F32, in_channels);`
`143`		`-`
`144`		`- attn_scale =ggml_new_tensor_1d(ctx, GGML_TYPE_F32,1);`
`145`		`-ggml_allocr_alloc(alloc, attn_scale);`
`146`		`-float scale =1.0f /sqrt((float)in_channels);`
`147`		`-ggml_backend_tensor_set(attn_scale, &scale,0,sizeof(scale));`
`148`	`141`	`}`
`149`	`142`
`150`	`143`	`voidmap_by_name(std::map<std::string,structggml_tensor*>& tensors,const std::string prefix) {`
`@@ -181,7 +174,7 @@ struct AttnBlock {`
`181`	`174`	`k =ggml_reshape_3d(ctx, k, c, h * w, n);// [N, h * w, in_channels]`
`182`	`175`
`183`	`176`	`auto w_ =ggml_mul_mat(ctx, k, q);// [N, h * w, h * w]`
`184`		`- w_ =ggml_scale_inplace(ctx, w_,attn_scale);`
	`177`	`+ w_ =ggml_scale_inplace(ctx, w_,1.0f /sqrt((float)in_channels));`
`185`	`178`	`w_ =ggml_soft_max_inplace(ctx, w_);`
`186`	`179`
`187`	`180`	`v =ggml_reshape_3d(ctx, v, h * w, c, n);// [N, in_channels, h * w]`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit2b6ec97

File tree

9 files changed

9 files changed

`‎clip.hpp‎`

`‎esrgan.hpp‎`

`‎ggml‎`

`‎ggml_extend.hpp‎`

`‎lora.hpp‎`

`‎model.cpp‎`

`‎tae.hpp‎`

`‎unet.hpp‎`

`‎vae.hpp‎`

0 commit comments