@@ -43,12 +43,14 @@ const char* schedule_str[] = {
4343const char * modes_str[] = {
4444" txt2img" ,
4545" img2img" ,
46+ " img2vid" ,
4647" convert" ,
4748};
4849
4950enum SDMode {
5051 TXT2IMG,
5152 IMG2IMG,
53+ IMG2VID,
5254 CONVERT,
5355 MODE_COUNT
5456};
@@ -71,12 +73,18 @@ struct SDParams {
7173
7274 std::string prompt;
7375 std::string negative_prompt;
76+ float min_cfg =1 .0f ;
7477float cfg_scale =7 .0f ;
7578int clip_skip = -1 ;// <= 0 represents unspecified
7679int width =512 ;
7780int height =512 ;
7881int batch_count =1 ;
7982
83+ int video_frames =6 ;
84+ int motion_bucket_id =127 ;
85+ int fps =6 ;
86+ float augmentation_level =0 .f;
87+
8088sample_method_t sample_method = EULER_A;
8189schedule_t schedule = DEFAULT;
8290int sample_steps =20 ;
@@ -108,6 +116,7 @@ void print_params(SDParams params) {
108116printf (" strength(control): %.2f\n " , params.control_strength );
109117printf (" prompt: %s\n " , params.prompt .c_str ());
110118printf (" negative_prompt: %s\n " , params.negative_prompt .c_str ());
119+ printf (" min_cfg: %.2f\n " , params.min_cfg );
111120printf (" cfg_scale: %.2f\n " , params.cfg_scale );
112121printf (" clip_skip: %d\n " , params.clip_skip );
113122printf (" width: %d\n " , params.width );
@@ -190,7 +199,8 @@ void parse_args(int argc, const char** argv, SDParams& params) {
190199 }
191200 }
192201if (mode_found == -1 ) {
193- fprintf (stderr," error: invalid mode %s, must be one of [txt2img, img2img]\n " ,
202+ fprintf (stderr,
203+ " error: invalid mode %s, must be one of [txt2img, img2img, img2vid, convert]\n " ,
194204 mode_selected);
195205exit (1 );
196206 }
@@ -420,7 +430,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
420430 params.n_threads =get_num_physical_cores ();
421431 }
422432
423- if (params.mode != CONVERT && params.prompt .length () ==0 ) {
433+ if (params.mode != CONVERT && params.mode != IMG2VID && params. prompt .length () ==0 ) {
424434fprintf (stderr," error: the following arguments are required: prompt\n " );
425435print_usage (argc, argv);
426436exit (1 );
@@ -432,7 +442,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
432442exit (1 );
433443 }
434444
435- if (params.mode == IMG2IMG && params.input_path .length () ==0 ) {
445+ if (( params.mode == IMG2IMG || params. mode == IMG2VID) && params.input_path .length () ==0 ) {
436446fprintf (stderr," error: when using the img2img mode, the following arguments are required: init-img\n " );
437447print_usage (argc, argv);
438448exit (1 );
@@ -539,9 +549,14 @@ int main(int argc, const char* argv[]) {
539549 }
540550 }
541551
552+ if (params.mode == IMG2VID) {
553+ fprintf (stderr," SVD support is broken, do not use it!!!\n " );
554+ return 1 ;
555+ }
556+
542557bool vae_decode_only =true ;
543558uint8_t * input_image_buffer =NULL ;
544- if (params.mode == IMG2IMG) {
559+ if (params.mode == IMG2IMG || params. mode == IMG2VID ) {
545560 vae_decode_only =false ;
546561
547562int c =0 ;
@@ -625,19 +640,57 @@ int main(int argc, const char* argv[]) {
6256403 ,
626641 input_image_buffer};
627642
628- results =img2img (sd_ctx,
629- input_image,
630- params.prompt .c_str (),
631- params.negative_prompt .c_str (),
632- params.clip_skip ,
633- params.cfg_scale ,
634- params.width ,
635- params.height ,
636- params.sample_method ,
637- params.sample_steps ,
638- params.strength ,
639- params.seed ,
640- params.batch_count );
643+ if (params.mode == IMG2VID) {
644+ results =img2vid (sd_ctx,
645+ input_image,
646+ params.width ,
647+ params.height ,
648+ params.video_frames ,
649+ params.motion_bucket_id ,
650+ params.fps ,
651+ params.augmentation_level ,
652+ params.min_cfg ,
653+ params.cfg_scale ,
654+ params.sample_method ,
655+ params.sample_steps ,
656+ params.strength ,
657+ params.seed );
658+ if (results ==NULL ) {
659+ printf (" generate failed\n " );
660+ free_sd_ctx (sd_ctx);
661+ return 1 ;
662+ }
663+ size_t last = params.output_path .find_last_of (" ." );
664+ std::string dummy_name = last != std::string::npos ? params.output_path .substr (0 , last) : params.output_path ;
665+ for (int i =0 ; i < params.video_frames ; i++) {
666+ if (results[i].data ==NULL ) {
667+ continue ;
668+ }
669+ std::string final_image_path = i >0 ? dummy_name +" _" +std::to_string (i +1 ) +" .png" : dummy_name +" .png" ;
670+ stbi_write_png (final_image_path.c_str (), results[i].width , results[i].height , results[i].channel ,
671+ results[i].data ,0 ,get_image_params (params, params.seed + i).c_str ());
672+ printf (" save result image to '%s'\n " , final_image_path.c_str ());
673+ free (results[i].data );
674+ results[i].data =NULL ;
675+ }
676+ free (results);
677+ free_sd_ctx (sd_ctx);
678+ return 0 ;
679+ }else {
680+ results =img2img (sd_ctx,
681+ input_image,
682+ params.prompt .c_str (),
683+ params.negative_prompt .c_str (),
684+ params.clip_skip ,
685+ params.cfg_scale ,
686+ params.width ,
687+ params.height ,
688+ params.sample_method ,
689+ params.sample_steps ,
690+ params.strength ,
691+ params.seed ,
692+ params.batch_count );
693+ }
641694 }
642695
643696if (results ==NULL ) {