Commit2eac844

committed

fix: generate image correctly in img2img mode

1 parent968226a commit2eac844Copy full SHA for 2eac844

File tree

1 file changed

+62

-32

lines changed

stable-diffusion.cpp

1 file changed

+62

-32

lines changed

`‎stable-diffusion.cpp‎`

Lines changed: 62 additions & 32 deletions

Original file line number	Diff line number	Diff line change
`@@ -131,7 +131,7 @@ void print_ggml_tensor(struct ggml_tensor* tensor, bool shape_only = false) {`
`131`	`131`	`if (shape_only) {`
`132`	`132`	`return;`
`133`	`133`	`}`
`134`		`-int range =1000;`
	`134`	`+int range =3;`
`135`	`135`	`for (int i =0; i < tensor->ne[3]; i++) {`
`136`	`136`	`if (i >= range && i + range < tensor->ne[3]) {`
`137`	`137`	`continue;`
`@@ -335,7 +335,7 @@ void sd_image_to_tensor(const uint8_t* image_data,`
`335`	`335`	`}`
`336`	`336`	`}`
`337`	`337`
`338`		`-floatsd_mean(structggml_tensor* src) {`
	`338`	`+floatggml_tensor_mean(structggml_tensor* src) {`
`339`	`339`	`float mean =0.0f;`
`340`	`340`	`int64_t nelements =ggml_nelements(src);`
`341`	`341`	`float* data = (float*)src->data;`
`@@ -345,15 +345,26 @@ float sd_mean(struct ggml_tensor* src) {`
`345`	`345`	`return mean;`
`346`	`346`	`}`
`347`	`347`
`348`		`-voidsd_scale(structggml_tensor* src,float scale) {`
	`348`	`+// a = a+b`
	`349`	`+voidggml_tensor_add(structggml_tensor* a,structggml_tensor* b) {`
	`350`	`+GGML_ASSERT(ggml_nelements(a) ==ggml_nelements(b));`
	`351`	`+int64_t nelements =ggml_nelements(a);`
	`352`	`+float* vec_a = (float*)a->data;`
	`353`	`+float* vec_b = (float*)b->data;`
	`354`	`+for (int i =0; i < nelements; i++) {`
	`355`	`+ vec_a[i] = vec_a[i] + vec_b[i];`
	`356`	`+ }`
	`357`	`+}`
	`358`	`+`
	`359`	`+voidggml_tensor_scale(structggml_tensor* src,float scale) {`
`349`	`360`	`int64_t nelements =ggml_nelements(src);`
`350`	`361`	`float* data = (float*)src->data;`
`351`	`362`	`for (int i =0; i < nelements; i++) {`
`352`	`363`	`data[i] = data[i] * scale;`
`353`	`364`	`}`
`354`	`365`	`}`
`355`	`366`
`356`		`-voidsd_clamp(structggml_tensor* src,float min,float max) {`
	`367`	`+voidggml_tensor_clamp(structggml_tensor* src,float min,float max) {`
`357`	`368`	`int64_t nelements =ggml_nelements(src);`
`358`	`369`	`float* data = (float*)src->data;`
`359`	`370`	`for (int i =0; i < nelements; i++) {`
`@@ -363,7 +374,7 @@ void sd_clamp(struct ggml_tensor* src, float min, float max) {`
`363`	`374`	`}`
`364`	`375`
`365`	`376`	`// convert values from [0, 1] to [-1, 1]`
`366`		`-voidsd_convert_input(structggml_tensor* src) {`
	`377`	`+voidggml_tensor_scale_input(structggml_tensor* src) {`
`367`	`378`	`int64_t nelements =ggml_nelements(src);`
`368`	`379`	`float* data = (float*)src->data;`
`369`	`380`	`for (int i =0; i < nelements; i++) {`
`@@ -373,7 +384,7 @@ void sd_convert_input(struct ggml_tensor* src) {`
`373`	`384`	`}`
`374`	`385`
`375`	`386`	`// convert values from [-1, 1] to [0, 1]`
`376`		`-voidsd_convert_output(structggml_tensor* src) {`
	`387`	`+voidggml_tensor_scale_output(structggml_tensor* src) {`
`377`	`388`	`int64_t nelements =ggml_nelements(src);`
`378`	`389`	`float* data = (float*)src->data;`
`379`	`390`	`for (int i =0; i < nelements; i++) {`
`@@ -4724,7 +4735,7 @@ class StableDiffusionGGML {`
`4724`	`4735`	`LOG_DEBUG("computing condition graph completed, taking %" PRId64" ms", t1 - t0);`
`4725`	`4736`	`ggml_tensor* result =ggml_dup_tensor(work_ctx, hidden_states);`
`4726`	`4737`	`{`
`4727`		`-float original_mean =sd_mean(hidden_states);`
	`4738`	`+float original_mean =ggml_tensor_mean(hidden_states);`
`4728`	`4739`	`for (int i2 =0; i2 < hidden_states->ne[2]; i2++) {`
`4729`	`4740`	`for (int i1 =0; i1 < hidden_states->ne[1]; i1++) {`
`4730`	`4741`	`for (int i0 =0; i0 < hidden_states->ne[0]; i0++) {`
`@@ -4734,16 +4745,17 @@ class StableDiffusionGGML {`
`4734`	`4745`	`}`
`4735`	`4746`	`}`
`4736`	`4747`	`}`
`4737`		`-float new_mean =sd_mean(result);`
`4738`		`-sd_scale(result, (original_mean / new_mean));`
	`4748`	`+float new_mean =ggml_tensor_mean(result);`
	`4749`	`+ggml_tensor_scale(result, (original_mean / new_mean));`
`4739`	`4750`	`}`
`4740`	`4751`	`return result;// [1, 77, 768]`
`4741`	`4752`	`}`
`4742`	`4753`
`4743`	`4754`	`ggml_tensorsample(ggml_context work_ctx,`
`4744`	`4755`	`ggml_tensor*x_t,`
`4745`		`- ggml_tensor* positive,`
`4746`		`- ggml_tensor* negative,`
	`4756`	`+ ggml_tensor* noise,`
	`4757`	`+ ggml_tensor* c,`
	`4758`	`+ ggml_tensor* uc,`
`4747`	`4759`	`float cfg_scale,`
`4748`	`4760`	`SampleMethod method,`
`4749`	`4761`	`const std::vector<float>& sigmas) {`
`@@ -4756,12 +4768,18 @@ class StableDiffusionGGML {`
`4756`	`4768`	`structggml_tensor* noised_input =ggml_dup_tensor(work_ctx,x_t);`
`4757`	`4769`	`structggml_tensor* timesteps =ggml_new_tensor_1d(work_ctx, GGML_TYPE_F32,1);// [N, ]`
`4758`	`4770`	`structggml_tensor* t_emb =new_timestep_embedding(work_ctx,NULL, timesteps, diffusion_model.model_channels);// [N, model_channels]`
`4759`		`- diffusion_model.begin(noised_input,positive, t_emb);`
	`4771`	`+ diffusion_model.begin(noised_input,c, t_emb);`
`4760`	`4772`
`4761`		`-bool has_unconditioned = cfg_scale !=1.0 &&negative !=NULL;`
	`4773`	`+bool has_unconditioned = cfg_scale !=1.0 &&uc !=NULL;`
`4762`	`4774`
`4763`		`-// x = x * sigmas[0]`
`4764`		`-sd_scale(x, sigmas[0]);`
	`4775`	`+if (noise ==NULL) {`
	`4776`	`+// x = x * sigmas[0]`
	`4777`	`+ggml_tensor_scale(x, sigmas[0]);`
	`4778`	`+ }else {`
	`4779`	`+// xi = x + noise * sigma_sched[0]`
	`4780`	`+ggml_tensor_scale(noise, sigmas[0]);`
	`4781`	`+ggml_tensor_add(x, noise);`
	`4782`	`+ }`
`4765`	`4783`
`4766`	`4784`	`// denoise wrapper`
`4767`	`4785`	`structggml_tensor* out_cond =ggml_dup_tensor(work_ctx, x);`
`@@ -4797,15 +4815,15 @@ class StableDiffusionGGML {`
`4797`	`4815`
`4798`	`4816`	`copy_ggml_tensor(noised_input, input);`
`4799`	`4817`	`// noised_input = noised_input * c_in`
`4800`		`-sd_scale(noised_input, c_in);`
	`4818`	`+ggml_tensor_scale(noised_input, c_in);`
`4801`	`4819`
`4802`	`4820`	`// cond`
`4803`		`- diffusion_model.compute(out_cond, n_threads, noised_input,NULL,positive, t_emb);`
	`4821`	`+ diffusion_model.compute(out_cond, n_threads, noised_input,NULL,c, t_emb);`
`4804`	`4822`
`4805`	`4823`	`float* negative_data =NULL;`
`4806`	`4824`	`if (has_unconditioned) {`
`4807`	`4825`	`// uncond`
`4808`		`- diffusion_model.compute(out_uncond, n_threads, noised_input,NULL,negative, t_emb);`
	`4826`	`+ diffusion_model.compute(out_uncond, n_threads, noised_input,NULL,uc, t_emb);`
`4809`	`4827`	`negative_data = (float*)out_uncond->data;`
`4810`	`4828`	`}`
`4811`	`4829`	`float* vec_denoised = (float*)denoised->data;`
`@@ -5260,15 +5278,15 @@ class StableDiffusionGGML {`
`5260`	`5278`	`int64_t t0 =ggml_time_ms();`
`5261`	`5279`	`if (!use_tiny_autoencoder) {`
`5262`	`5280`	`if (decode) {`
`5263`		`-sd_scale(x,1.0f / scale_factor);`
	`5281`	`+ggml_tensor_scale(x,1.0f / scale_factor);`
`5264`	`5282`	`}else {`
`5265`		`-sd_convert_input(x);`
	`5283`	`+ggml_tensor_scale_input(x);`
`5266`	`5284`	`}`
`5267`	`5285`	`first_stage_model.begin(x, decode);`
`5268`	`5286`	`first_stage_model.compute(result, n_threads, x, decode);`
`5269`	`5287`	`first_stage_model.end();`
`5270`	`5288`	`if (decode) {`
`5271`		`-sd_convert_output(result);`
	`5289`	`+ggml_tensor_scale_output(result);`
`5272`	`5290`	`}`
`5273`	`5291`	`}else {`
`5274`	`5292`	`tae_first_stage.begin(x, decode);`
`@@ -5278,10 +5296,18 @@ class StableDiffusionGGML {`
`5278`	`5296`	`int64_t t1 =ggml_time_ms();`
`5279`	`5297`	`LOG_DEBUG("computing vae [mode: %s] graph completed, taking %.2fs", decode ?"DECODE" :"ENCODE", (t1 - t0) *1.0f /1000);`
`5280`	`5298`	`if (decode) {`
`5281`		`-sd_clamp(result,0.0f,1.0f);`
	`5299`	`+ggml_tensor_clamp(result,0.0f,1.0f);`
`5282`	`5300`	`}`
`5283`	`5301`	`return result;`
`5284`	`5302`	`}`
	`5303`	`+`
	`5304`	`+ ggml_tensorencode_first_stage(ggml_context work_ctx, ggml_tensor* x) {`
	`5305`	`+returncompute_first_stage(work_ctx, x,false);`
	`5306`	`+ }`
	`5307`	`+`
	`5308`	`+ ggml_tensordecode_first_stage(ggml_context work_ctx, ggml_tensor* x) {`
	`5309`	`+returncompute_first_stage(work_ctx, x,true);`
	`5310`	`+ }`
`5285`	`5311`	`};`
`5286`	`5312`
`5287`	`5313`	`/================================================= StableDiffusion ==================================================/`
`@@ -5358,11 +5384,11 @@ std::vector<uint8_t*> StableDiffusion::txt2img(std::string prompt,`
`5358`	`5384`	`seed =rand();`
`5359`	`5385`	`}`
`5360`	`5386`
`5361`		`- t0=ggml_time_ms();`
`5362`		`- ggml_tensor*postive = sd->get_learned_condition(work_ctx, prompt);`
`5363`		`-structggml_tensor*negative =NULL;`
	`5387`	`+ t0 =ggml_time_ms();`
	`5388`	`+ ggml_tensor*c = sd->get_learned_condition(work_ctx, prompt);`
	`5389`	`+structggml_tensor*uc =NULL;`
`5364`	`5390`	`if (cfg_scale !=1.0) {`
`5365`		`-negative = sd->get_learned_condition(work_ctx, negative_prompt);`
	`5391`	`+uc = sd->get_learned_condition(work_ctx, negative_prompt);`
`5366`	`5392`	`}`
`5367`	`5393`	`t1 =ggml_time_ms();`
`5368`	`5394`	`LOG_INFO("get_learned_condition completed, taking %" PRId64" ms", t1 - t0);`
`@@ -5387,7 +5413,7 @@ std::vector<uint8_t*> StableDiffusion::txt2img(std::string prompt,`
`5387`	`5413`
`5388`	`5414`	`std::vector<float> sigmas = sd->denoiser->schedule->get_sigmas(sample_steps);`
`5389`	`5415`
`5390`		`-structggml_tensor* x_0 = sd->sample(work_ctx,x_t,postive, negative, cfg_scale, sample_method, sigmas);`
	`5416`	`+structggml_tensor* x_0 = sd->sample(work_ctx,x_t,NULL, c, uc, cfg_scale, sample_method, sigmas);`
`5391`	`5417`	`// struct ggml_tensor* x_0 = load_tensor_from_file(ctx, "samples_ddim.bin");`
`5392`	`5418`	`// print_ggml_tensor(x_0);`
`5393`	`5419`	`int64_t sampling_end =ggml_time_ms();`
`@@ -5404,7 +5430,7 @@ std::vector<uint8_t*> StableDiffusion::txt2img(std::string prompt,`
`5404`	`5430`	`LOG_INFO("decoding %zu latents", final_latents.size());`
`5405`	`5431`	`for (size_t i =0; i < final_latents.size(); i++) {`
`5406`	`5432`	`t1 =ggml_time_ms();`
`5407`		`-structggml_tensor* img = sd->compute_first_stage(work_ctx, final_latents[i]/* x_0*/,true);`
	`5433`	`+structggml_tensor* img = sd->decode_first_stage(work_ctx, final_latents[i]/* x_0*/);`
`5408`	`5434`	`if (img !=NULL) {`
`5409`	`5435`	`results.push_back(sd_tensor_to_image(img));`
`5410`	`5436`	`}`
`@@ -5483,10 +5509,10 @@ std::vector<uint8_t> StableDiffusion::img2img(const uint8_t init_img_data,`
`5483`	`5509`	`t0 =ggml_time_ms();`
`5484`	`5510`	`ggml_tensor* init_latent =NULL;`
`5485`	`5511`	`if (!sd->use_tiny_autoencoder) {`
`5486`		`- ggml_tensor* moments = sd->compute_first_stage(work_ctx, init_img,false);`
	`5512`	`+ ggml_tensor* moments = sd->encode_first_stage(work_ctx, init_img);`
`5487`	`5513`	`init_latent = sd->get_first_stage_encoding(work_ctx, moments);`
`5488`	`5514`	`}else {`
`5489`		`- init_latent = sd->compute_first_stage(work_ctx, init_img,false);`
	`5515`	`+ init_latent = sd->encode_first_stage(work_ctx, init_img);`
`5490`	`5516`	`}`
`5491`	`5517`	`// print_ggml_tensor(init_latent);`
`5492`	`5518`	`t1 =ggml_time_ms();`
`@@ -5507,8 +5533,12 @@ std::vector<uint8_t> StableDiffusion::img2img(const uint8_t init_img_data,`
`5507`	`5533`	`// requires encode_adm`
`5508`	`5534`	`// apply set_timestep_embedding with dim 256`
`5509`	`5535`
	`5536`	`+ sd->rng->manual_seed(seed);`
	`5537`	`+structggml_tensor* noise =ggml_dup_tensor(work_ctx, init_latent);`
	`5538`	`+ggml_tensor_set_f32_randn(noise, sd->rng);`
	`5539`	`+`
`5510`	`5540`	`LOG_INFO("sampling using %s method", sampling_methods_str[sample_method]);`
`5511`		`-structggml_tensor* x_0 = sd->sample(work_ctx, init_latent, c, uc, cfg_scale, sample_method, sigma_sched);`
	`5541`	`+structggml_tensor* x_0 = sd->sample(work_ctx, init_latent,noise,c, uc, cfg_scale, sample_method, sigma_sched);`
`5512`	`5542`	`// struct ggml_tensor *x_0 = load_tensor_from_file(ctx, "samples_ddim.bin");`
`5513`	`5543`	`// print_ggml_tensor(x_0);`
`5514`	`5544`	`int64_t t3 =ggml_time_ms();`
`@@ -5517,7 +5547,7 @@ std::vector<uint8_t> StableDiffusion::img2img(const uint8_t init_img_data,`
`5517`	`5547`	`sd->diffusion_model.destroy();`
`5518`	`5548`	`}`
`5519`	`5549`
`5520`		`-structggml_tensor* img = sd->compute_first_stage(work_ctx, x_0,true);`
	`5550`	`+structggml_tensor* img = sd->decode_first_stage(work_ctx, x_0);`
`5521`	`5551`	`if (img !=NULL) {`
`5522`	`5552`	`result.push_back(sd_tensor_to_image(img));`
`5523`	`5553`	`}`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit2eac844

File tree

1 file changed

1 file changed

`‎stable-diffusion.cpp‎`

0 commit comments