Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit2eac844

Browse files
committed
fix: generate image correctly in img2img mode
1 parent968226a commit2eac844

File tree

1 file changed

+62
-32
lines changed

1 file changed

+62
-32
lines changed

‎stable-diffusion.cpp‎

Lines changed: 62 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ void print_ggml_tensor(struct ggml_tensor* tensor, bool shape_only = false) {
131131
if (shape_only) {
132132
return;
133133
}
134-
int range =1000;
134+
int range =3;
135135
for (int i =0; i < tensor->ne[3]; i++) {
136136
if (i >= range && i + range < tensor->ne[3]) {
137137
continue;
@@ -335,7 +335,7 @@ void sd_image_to_tensor(const uint8_t* image_data,
335335
}
336336
}
337337

338-
floatsd_mean(structggml_tensor* src) {
338+
floatggml_tensor_mean(structggml_tensor* src) {
339339
float mean =0.0f;
340340
int64_t nelements =ggml_nelements(src);
341341
float* data = (float*)src->data;
@@ -345,15 +345,26 @@ float sd_mean(struct ggml_tensor* src) {
345345
return mean;
346346
}
347347

348-
voidsd_scale(structggml_tensor* src,float scale) {
348+
// a = a+b
349+
voidggml_tensor_add(structggml_tensor* a,structggml_tensor* b) {
350+
GGML_ASSERT(ggml_nelements(a) ==ggml_nelements(b));
351+
int64_t nelements =ggml_nelements(a);
352+
float* vec_a = (float*)a->data;
353+
float* vec_b = (float*)b->data;
354+
for (int i =0; i < nelements; i++) {
355+
vec_a[i] = vec_a[i] + vec_b[i];
356+
}
357+
}
358+
359+
voidggml_tensor_scale(structggml_tensor* src,float scale) {
349360
int64_t nelements =ggml_nelements(src);
350361
float* data = (float*)src->data;
351362
for (int i =0; i < nelements; i++) {
352363
data[i] = data[i] * scale;
353364
}
354365
}
355366

356-
voidsd_clamp(structggml_tensor* src,float min,float max) {
367+
voidggml_tensor_clamp(structggml_tensor* src,float min,float max) {
357368
int64_t nelements =ggml_nelements(src);
358369
float* data = (float*)src->data;
359370
for (int i =0; i < nelements; i++) {
@@ -363,7 +374,7 @@ void sd_clamp(struct ggml_tensor* src, float min, float max) {
363374
}
364375

365376
// convert values from [0, 1] to [-1, 1]
366-
voidsd_convert_input(structggml_tensor* src) {
377+
voidggml_tensor_scale_input(structggml_tensor* src) {
367378
int64_t nelements =ggml_nelements(src);
368379
float* data = (float*)src->data;
369380
for (int i =0; i < nelements; i++) {
@@ -373,7 +384,7 @@ void sd_convert_input(struct ggml_tensor* src) {
373384
}
374385

375386
// convert values from [-1, 1] to [0, 1]
376-
voidsd_convert_output(structggml_tensor* src) {
387+
voidggml_tensor_scale_output(structggml_tensor* src) {
377388
int64_t nelements =ggml_nelements(src);
378389
float* data = (float*)src->data;
379390
for (int i =0; i < nelements; i++) {
@@ -4724,7 +4735,7 @@ class StableDiffusionGGML {
47244735
LOG_DEBUG("computing condition graph completed, taking %" PRId64" ms", t1 - t0);
47254736
ggml_tensor* result =ggml_dup_tensor(work_ctx, hidden_states);
47264737
{
4727-
float original_mean =sd_mean(hidden_states);
4738+
float original_mean =ggml_tensor_mean(hidden_states);
47284739
for (int i2 =0; i2 < hidden_states->ne[2]; i2++) {
47294740
for (int i1 =0; i1 < hidden_states->ne[1]; i1++) {
47304741
for (int i0 =0; i0 < hidden_states->ne[0]; i0++) {
@@ -4734,16 +4745,17 @@ class StableDiffusionGGML {
47344745
}
47354746
}
47364747
}
4737-
float new_mean =sd_mean(result);
4738-
sd_scale(result, (original_mean / new_mean));
4748+
float new_mean =ggml_tensor_mean(result);
4749+
ggml_tensor_scale(result, (original_mean / new_mean));
47394750
}
47404751
return result;// [1, 77, 768]
47414752
}
47424753

47434754
ggml_tensor*sample(ggml_context* work_ctx,
47444755
ggml_tensor*x_t,
4745-
ggml_tensor* positive,
4746-
ggml_tensor* negative,
4756+
ggml_tensor* noise,
4757+
ggml_tensor* c,
4758+
ggml_tensor* uc,
47474759
float cfg_scale,
47484760
SampleMethod method,
47494761
const std::vector<float>& sigmas) {
@@ -4756,12 +4768,18 @@ class StableDiffusionGGML {
47564768
structggml_tensor* noised_input =ggml_dup_tensor(work_ctx,x_t);
47574769
structggml_tensor* timesteps =ggml_new_tensor_1d(work_ctx, GGML_TYPE_F32,1);// [N, ]
47584770
structggml_tensor* t_emb =new_timestep_embedding(work_ctx,NULL, timesteps, diffusion_model.model_channels);// [N, model_channels]
4759-
diffusion_model.begin(noised_input,positive, t_emb);
4771+
diffusion_model.begin(noised_input,c, t_emb);
47604772

4761-
bool has_unconditioned = cfg_scale !=1.0 &&negative !=NULL;
4773+
bool has_unconditioned = cfg_scale !=1.0 &&uc !=NULL;
47624774

4763-
// x = x * sigmas[0]
4764-
sd_scale(x, sigmas[0]);
4775+
if (noise ==NULL) {
4776+
// x = x * sigmas[0]
4777+
ggml_tensor_scale(x, sigmas[0]);
4778+
}else {
4779+
// xi = x + noise * sigma_sched[0]
4780+
ggml_tensor_scale(noise, sigmas[0]);
4781+
ggml_tensor_add(x, noise);
4782+
}
47654783

47664784
// denoise wrapper
47674785
structggml_tensor* out_cond =ggml_dup_tensor(work_ctx, x);
@@ -4797,15 +4815,15 @@ class StableDiffusionGGML {
47974815

47984816
copy_ggml_tensor(noised_input, input);
47994817
// noised_input = noised_input * c_in
4800-
sd_scale(noised_input, c_in);
4818+
ggml_tensor_scale(noised_input, c_in);
48014819

48024820
// cond
4803-
diffusion_model.compute(out_cond, n_threads, noised_input,NULL,positive, t_emb);
4821+
diffusion_model.compute(out_cond, n_threads, noised_input,NULL,c, t_emb);
48044822

48054823
float* negative_data =NULL;
48064824
if (has_unconditioned) {
48074825
// uncond
4808-
diffusion_model.compute(out_uncond, n_threads, noised_input,NULL,negative, t_emb);
4826+
diffusion_model.compute(out_uncond, n_threads, noised_input,NULL,uc, t_emb);
48094827
negative_data = (float*)out_uncond->data;
48104828
}
48114829
float* vec_denoised = (float*)denoised->data;
@@ -5260,15 +5278,15 @@ class StableDiffusionGGML {
52605278
int64_t t0 =ggml_time_ms();
52615279
if (!use_tiny_autoencoder) {
52625280
if (decode) {
5263-
sd_scale(x,1.0f / scale_factor);
5281+
ggml_tensor_scale(x,1.0f / scale_factor);
52645282
}else {
5265-
sd_convert_input(x);
5283+
ggml_tensor_scale_input(x);
52665284
}
52675285
first_stage_model.begin(x, decode);
52685286
first_stage_model.compute(result, n_threads, x, decode);
52695287
first_stage_model.end();
52705288
if (decode) {
5271-
sd_convert_output(result);
5289+
ggml_tensor_scale_output(result);
52725290
}
52735291
}else {
52745292
tae_first_stage.begin(x, decode);
@@ -5278,10 +5296,18 @@ class StableDiffusionGGML {
52785296
int64_t t1 =ggml_time_ms();
52795297
LOG_DEBUG("computing vae [mode: %s] graph completed, taking %.2fs", decode ?"DECODE" :"ENCODE", (t1 - t0) *1.0f /1000);
52805298
if (decode) {
5281-
sd_clamp(result,0.0f,1.0f);
5299+
ggml_tensor_clamp(result,0.0f,1.0f);
52825300
}
52835301
return result;
52845302
}
5303+
5304+
ggml_tensor*encode_first_stage(ggml_context* work_ctx, ggml_tensor* x) {
5305+
returncompute_first_stage(work_ctx, x,false);
5306+
}
5307+
5308+
ggml_tensor*decode_first_stage(ggml_context* work_ctx, ggml_tensor* x) {
5309+
returncompute_first_stage(work_ctx, x,true);
5310+
}
52855311
};
52865312

52875313
/*================================================= StableDiffusion ==================================================*/
@@ -5358,11 +5384,11 @@ std::vector<uint8_t*> StableDiffusion::txt2img(std::string prompt,
53585384
seed =rand();
53595385
}
53605386

5361-
t0=ggml_time_ms();
5362-
ggml_tensor*postive = sd->get_learned_condition(work_ctx, prompt);
5363-
structggml_tensor*negative =NULL;
5387+
t0 =ggml_time_ms();
5388+
ggml_tensor*c = sd->get_learned_condition(work_ctx, prompt);
5389+
structggml_tensor*uc =NULL;
53645390
if (cfg_scale !=1.0) {
5365-
negative = sd->get_learned_condition(work_ctx, negative_prompt);
5391+
uc = sd->get_learned_condition(work_ctx, negative_prompt);
53665392
}
53675393
t1 =ggml_time_ms();
53685394
LOG_INFO("get_learned_condition completed, taking %" PRId64" ms", t1 - t0);
@@ -5387,7 +5413,7 @@ std::vector<uint8_t*> StableDiffusion::txt2img(std::string prompt,
53875413

53885414
std::vector<float> sigmas = sd->denoiser->schedule->get_sigmas(sample_steps);
53895415

5390-
structggml_tensor* x_0 = sd->sample(work_ctx,x_t,postive, negative, cfg_scale, sample_method, sigmas);
5416+
structggml_tensor* x_0 = sd->sample(work_ctx,x_t,NULL, c, uc, cfg_scale, sample_method, sigmas);
53915417
// struct ggml_tensor* x_0 = load_tensor_from_file(ctx, "samples_ddim.bin");
53925418
// print_ggml_tensor(x_0);
53935419
int64_t sampling_end =ggml_time_ms();
@@ -5404,7 +5430,7 @@ std::vector<uint8_t*> StableDiffusion::txt2img(std::string prompt,
54045430
LOG_INFO("decoding %zu latents", final_latents.size());
54055431
for (size_t i =0; i < final_latents.size(); i++) {
54065432
t1 =ggml_time_ms();
5407-
structggml_tensor* img = sd->compute_first_stage(work_ctx, final_latents[i]/* x_0*/,true);
5433+
structggml_tensor* img = sd->decode_first_stage(work_ctx, final_latents[i]/* x_0*/);
54085434
if (img !=NULL) {
54095435
results.push_back(sd_tensor_to_image(img));
54105436
}
@@ -5483,10 +5509,10 @@ std::vector<uint8_t*> StableDiffusion::img2img(const uint8_t* init_img_data,
54835509
t0 =ggml_time_ms();
54845510
ggml_tensor* init_latent =NULL;
54855511
if (!sd->use_tiny_autoencoder) {
5486-
ggml_tensor* moments = sd->compute_first_stage(work_ctx, init_img,false);
5512+
ggml_tensor* moments = sd->encode_first_stage(work_ctx, init_img);
54875513
init_latent = sd->get_first_stage_encoding(work_ctx, moments);
54885514
}else {
5489-
init_latent = sd->compute_first_stage(work_ctx, init_img,false);
5515+
init_latent = sd->encode_first_stage(work_ctx, init_img);
54905516
}
54915517
// print_ggml_tensor(init_latent);
54925518
t1 =ggml_time_ms();
@@ -5507,8 +5533,12 @@ std::vector<uint8_t*> StableDiffusion::img2img(const uint8_t* init_img_data,
55075533
// requires encode_adm
55085534
// apply set_timestep_embedding with dim 256
55095535

5536+
sd->rng->manual_seed(seed);
5537+
structggml_tensor* noise =ggml_dup_tensor(work_ctx, init_latent);
5538+
ggml_tensor_set_f32_randn(noise, sd->rng);
5539+
55105540
LOG_INFO("sampling using %s method", sampling_methods_str[sample_method]);
5511-
structggml_tensor* x_0 = sd->sample(work_ctx, init_latent, c, uc, cfg_scale, sample_method, sigma_sched);
5541+
structggml_tensor* x_0 = sd->sample(work_ctx, init_latent,noise,c, uc, cfg_scale, sample_method, sigma_sched);
55125542
// struct ggml_tensor *x_0 = load_tensor_from_file(ctx, "samples_ddim.bin");
55135543
// print_ggml_tensor(x_0);
55145544
int64_t t3 =ggml_time_ms();
@@ -5517,7 +5547,7 @@ std::vector<uint8_t*> StableDiffusion::img2img(const uint8_t* init_img_data,
55175547
sd->diffusion_model.destroy();
55185548
}
55195549

5520-
structggml_tensor* img = sd->compute_first_stage(work_ctx, x_0,true);
5550+
structggml_tensor* img = sd->decode_first_stage(work_ctx, x_0);
55215551
if (img !=NULL) {
55225552
result.push_back(sd_tensor_to_image(img));
55235553
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp