NotificationsYou must be signed in to change notification settings
Fork13.9k
Star90.6k

Commit6bc4400

committed

ggml : add Q5 WASM SIMD + GGML_FTYPE

1 parentf0d70f1 commit6bc4400Copy full SHA for 6bc4400

File tree

2 files changed

+177

-2

lines changed

2 files changed

+177

-2

lines changed

`‎ggml.c‎`

Lines changed: 160 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -330,7 +330,7 @@ static ggml_fp16_t table_exp_f16[1 << 16];`
`330`	`330`	`// precomputed f32 table for f16 (256 KB)`
`331`	`331`	`staticfloattable_f32_f16[1 <<16];`
`332`	`332`
`333`		`-#if defined(__ARM_NEON)`
	`333`	`+#if defined(__ARM_NEON)\|\| defined(__wasm_simd128__)`
`334`	`334`	`#defineB1(c,s,n) 0x ## n ## c , 0x ## n ## s`
`335`	`335`	`#defineB2(c,s,n) B1(c,s,n ## c), B1(c,s,n ## s)`
`336`	`336`	`#defineB3(c,s,n) B2(c,s,n ## c), B2(c,s,n ## s)`
`@@ -1087,7 +1087,7 @@ static void quantize_row_q4_0(const float * restrict x, void * restrict vy, int`
`1087`	`1087`	`constv128_tv=wasm_f32x4_mul(srcv[l],wasm_f32x4_splat(id));`
`1088`	`1088`	`constv128_tvf=wasm_f32x4_add(v,wasm_f32x4_splat(8.5f));`
`1089`	`1089`	`constv128_tvi=wasm_i32x4_trunc_sat_f32x4(vf);`
`1090`		`-constv128_tvc=wasm_i32x4_min_u(vi,wasm_i32x4_splat(15));`
	`1090`	`+constv128_tvc=wasm_i32x4_min(vi,wasm_i32x4_splat(15));`
`1091`	`1091`
`1092`	`1092`	`y[i].qs[2*l+0]=wasm_i32x4_extract_lane(vc,0) \| (wasm_i32x4_extract_lane(vc,1) <<4);`
`1093`	`1093`	`y[i].qs[2*l+1]=wasm_i32x4_extract_lane(vc,2) \| (wasm_i32x4_extract_lane(vc,3) <<4);`
`@@ -3180,6 +3180,72 @@ static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void *`
`3180`	`3180`	`}`
`3181`	`3181`
`3182`	`3182`	`*s=vaddvq_f32(sumv);`
	`3183`	`+#elif defined(__wasm_simd128__)`
	`3184`	`+v128_tsumv=wasm_f32x4_splat(0.0f);`
	`3185`	`+`
	`3186`	`+uint64_ttmp[4];`
	`3187`	`+`
	`3188`	`+for (inti=0;i<nb;++i) {`
	`3189`	`+constblock_q5_0* restrictx0=&x[i];`
	`3190`	`+constblock_q8_0* restricty0=&y[i];`
	`3191`	`+`
	`3192`	`+constv128_tm4b=wasm_i8x16_splat(0x0F);`
	`3193`	`+constv128_ts16b=wasm_i8x16_splat(0x10);`
	`3194`	`+`
	`3195`	`+// extract the 5th bit`
	`3196`	`+uint32_tqh;`
	`3197`	`+memcpy(&qh,x0->qh,sizeof(qh));`
	`3198`	`+`
	`3199`	`+tmp[0]=table_b2b_u[(qh >>0)&0xFF];`
	`3200`	`+tmp[1]=table_b2b_u[(qh >>8)&0xFF];`
	`3201`	`+tmp[2]=table_b2b_u[(qh >>16)&0xFF];`
	`3202`	`+tmp[3]=table_b2b_u[(qh >>24) ];`
	`3203`	`+`
	`3204`	`+constv128_tqhl=wasm_v128_load(tmp+0);`
	`3205`	`+constv128_tqhh=wasm_v128_load(tmp+2);`
	`3206`	`+`
	`3207`	`+constv128_tv0=wasm_v128_load(x0->qs);`
	`3208`	`+`
	`3209`	`+// 4-bit -> 8-bit`
	`3210`	`+constv128_tv0l=wasm_v128_and (v0,m4b);`
	`3211`	`+constv128_tv0h=wasm_u8x16_shr(v0,4);`
	`3212`	`+`
	`3213`	`+// interleave`
	`3214`	`+constv128_tv0lz=wasm_v8x16_shuffle(v0l,v0h,0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23);`
	`3215`	`+constv128_tv0hz=wasm_v8x16_shuffle(v0l,v0h,8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31);`
	`3216`	`+`
	`3217`	`+// add high bit and sub 16`
	`3218`	`+constv128_tv0lf=wasm_i8x16_sub(wasm_v128_or(v0lz,qhl),s16b);`
	`3219`	`+constv128_tv0hf=wasm_i8x16_sub(wasm_v128_or(v0hz,qhh),s16b);`
	`3220`	`+`
	`3221`	`+// load y`
	`3222`	`+constv128_tv1l=wasm_v128_load(y0->qs);`
	`3223`	`+constv128_tv1h=wasm_v128_load(y0->qs+16);`
	`3224`	`+`
	`3225`	`+// int8x16 -> int16x8`
	`3226`	`+constv128_tv0lfl=wasm_i16x8_extend_low_i8x16 (v0lf);`
	`3227`	`+constv128_tv0lfh=wasm_i16x8_extend_high_i8x16(v0lf);`
	`3228`	`+constv128_tv0hfl=wasm_i16x8_extend_low_i8x16 (v0hf);`
	`3229`	`+constv128_tv0hfh=wasm_i16x8_extend_high_i8x16(v0hf);`
	`3230`	`+`
	`3231`	`+constv128_tv1ll=wasm_i16x8_extend_low_i8x16 (v1l);`
	`3232`	`+constv128_tv1lh=wasm_i16x8_extend_high_i8x16(v1l);`
	`3233`	`+constv128_tv1hl=wasm_i16x8_extend_low_i8x16 (v1h);`
	`3234`	`+constv128_tv1hh=wasm_i16x8_extend_high_i8x16(v1h);`
	`3235`	`+`
	`3236`	`+constfloatx0d=GGML_FP16_TO_FP32(x0->d);`
	`3237`	`+`
	`3238`	`+// dot product`
	`3239`	`+sumv=wasm_f32x4_add(sumv,wasm_f32x4_mul(wasm_f32x4_convert_i32x4(`
	`3240`	`+wasm_i32x4_add(`
	`3241`	`+wasm_i32x4_add(wasm_i32x4_dot_i16x8(v0lfl,v1ll),`
	`3242`	`+wasm_i32x4_dot_i16x8(v0lfh,v1lh)),`
	`3243`	`+wasm_i32x4_add(wasm_i32x4_dot_i16x8(v0hfl,v1hl),`
	`3244`	`+wasm_i32x4_dot_i16x8(v0hfh,v1hh)))),wasm_f32x4_splat(x0d*y0->d)));`
	`3245`	`+ }`
	`3246`	`+`
	`3247`	`+*s=wasm_f32x4_extract_lane(sumv,0)+wasm_f32x4_extract_lane(sumv,1)+`
	`3248`	`+wasm_f32x4_extract_lane(sumv,2)+wasm_f32x4_extract_lane(sumv,3);`
`3183`	`3249`	`#elif defined(__AVX2__)`
`3184`	`3250`	`// Initialize accumulator with zeros`
`3185`	`3251`	`__m256acc=_mm256_setzero_ps();`
`@@ -3311,6 +3377,77 @@ static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void *`
`3311`	`3377`	`}`
`3312`	`3378`
`3313`	`3379`	`*s=vaddvq_f32(sumv)+summs;`
	`3380`	`+#elif defined(__wasm_simd128__)`
	`3381`	`+v128_tsumv=wasm_f32x4_splat(0.0f);`
	`3382`	`+`
	`3383`	`+floatsumms=0.0f;`
	`3384`	`+`
	`3385`	`+uint64_ttmp[4];`
	`3386`	`+`
	`3387`	`+for (inti=0;i<nb;++i) {`
	`3388`	`+constblock_q5_1* restrictx0=&x[i];`
	`3389`	`+constblock_q8_1* restricty0=&y[i];`
	`3390`	`+`
	`3391`	`+summs+=GGML_FP16_TO_FP32(x0->m)* (y0->s0+y0->s1);`
	`3392`	`+`
	`3393`	`+constv128_tm4b=wasm_i8x16_splat(0x0F);`
	`3394`	`+`
	`3395`	`+// extract the 5th bit`
	`3396`	`+uint32_tqh;`
	`3397`	`+memcpy(&qh,x0->qh,sizeof(qh));`
	`3398`	`+`
	`3399`	`+tmp[0]=table_b2b_u[(qh >>0)&0xFF];`
	`3400`	`+tmp[1]=table_b2b_u[(qh >>8)&0xFF];`
	`3401`	`+tmp[2]=table_b2b_u[(qh >>16)&0xFF];`
	`3402`	`+tmp[3]=table_b2b_u[(qh >>24) ];`
	`3403`	`+`
	`3404`	`+constv128_tqhl=wasm_v128_load(tmp+0);`
	`3405`	`+constv128_tqhh=wasm_v128_load(tmp+2);`
	`3406`	`+`
	`3407`	`+constv128_tv0=wasm_v128_load(x0->qs);`
	`3408`	`+`
	`3409`	`+// 4-bit -> 8-bit`
	`3410`	`+constv128_tv0l=wasm_v128_and (v0,m4b);`
	`3411`	`+constv128_tv0h=wasm_u8x16_shr(v0,4);`
	`3412`	`+`
	`3413`	`+staticboolx= true;`
	`3414`	`+`
	`3415`	`+// interleave`
	`3416`	`+constv128_tv0lz=wasm_v8x16_shuffle(v0l,v0h,0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23);`
	`3417`	`+constv128_tv0hz=wasm_v8x16_shuffle(v0l,v0h,8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31);`
	`3418`	`+`
	`3419`	`+// add high bit`
	`3420`	`+constv128_tv0lf=wasm_v128_or(v0lz,qhl);`
	`3421`	`+constv128_tv0hf=wasm_v128_or(v0hz,qhh);`
	`3422`	`+`
	`3423`	`+// load y`
	`3424`	`+constv128_tv1l=wasm_v128_load(y0->qs);`
	`3425`	`+constv128_tv1h=wasm_v128_load(y0->qs+16);`
	`3426`	`+`
	`3427`	`+// int8x16 -> int16x8`
	`3428`	`+constv128_tv0lfl=wasm_i16x8_extend_low_i8x16 (v0lf);`
	`3429`	`+constv128_tv0lfh=wasm_i16x8_extend_high_i8x16(v0lf);`
	`3430`	`+constv128_tv0hfl=wasm_i16x8_extend_low_i8x16 (v0hf);`
	`3431`	`+constv128_tv0hfh=wasm_i16x8_extend_high_i8x16(v0hf);`
	`3432`	`+`
	`3433`	`+constv128_tv1ll=wasm_i16x8_extend_low_i8x16 (v1l);`
	`3434`	`+constv128_tv1lh=wasm_i16x8_extend_high_i8x16(v1l);`
	`3435`	`+constv128_tv1hl=wasm_i16x8_extend_low_i8x16 (v1h);`
	`3436`	`+constv128_tv1hh=wasm_i16x8_extend_high_i8x16(v1h);`
	`3437`	`+`
	`3438`	`+constfloatx0d=GGML_FP16_TO_FP32(x0->d);`
	`3439`	`+`
	`3440`	`+// dot product`
	`3441`	`+sumv=wasm_f32x4_add(sumv,wasm_f32x4_mul(wasm_f32x4_convert_i32x4(`
	`3442`	`+wasm_i32x4_add(`
	`3443`	`+wasm_i32x4_add(wasm_i32x4_dot_i16x8(v0lfl,v1ll),`
	`3444`	`+wasm_i32x4_dot_i16x8(v0lfh,v1lh)),`
	`3445`	`+wasm_i32x4_add(wasm_i32x4_dot_i16x8(v0hfl,v1hl),`
	`3446`	`+wasm_i32x4_dot_i16x8(v0hfh,v1hh)))),wasm_f32x4_splat(x0d*y0->d)));`
	`3447`	`+ }`
	`3448`	`+`
	`3449`	`+*s=wasm_f32x4_extract_lane(sumv,0)+wasm_f32x4_extract_lane(sumv,1)+`
	`3450`	`+wasm_f32x4_extract_lane(sumv,2)+wasm_f32x4_extract_lane(sumv,3)+summs;`
`3314`	`3451`	`#elif defined(__AVX2__)`
`3315`	`3452`	`// Initialize accumulator with zeros`
`3316`	`3453`	`__m256acc=_mm256_setzero_ps();`
`@@ -4057,6 +4194,27 @@ bool ggml_is_quantized(enum ggml_type type) {`
`4057`	`4194`	`returnGGML_IS_QUANTIZED[type];`
`4058`	`4195`	`}`
`4059`	`4196`
	`4197`	`+enumggml_typeggml_ftype_to_ggml_type(enumggml_ftypeftype) {`
	`4198`	`+enumggml_typewtype=GGML_TYPE_COUNT;`
	`4199`	`+`
	`4200`	`+switch (ftype) {`
	`4201`	`+caseGGML_FTYPE_ALL_F32:wtype=GGML_TYPE_F32;break;`
	`4202`	`+caseGGML_FTYPE_MOSTLY_F16:wtype=GGML_TYPE_F16;break;`
	`4203`	`+caseGGML_FTYPE_MOSTLY_Q4_0:wtype=GGML_TYPE_Q4_0;break;`
	`4204`	`+caseGGML_FTYPE_MOSTLY_Q4_1:wtype=GGML_TYPE_Q4_1;break;`
	`4205`	`+caseGGML_FTYPE_MOSTLY_Q4_2:wtype=GGML_TYPE_Q4_2;break;`
	`4206`	`+caseGGML_FTYPE_MOSTLY_Q5_0:wtype=GGML_TYPE_Q5_0;break;`
	`4207`	`+caseGGML_FTYPE_MOSTLY_Q5_1:wtype=GGML_TYPE_Q5_1;break;`
	`4208`	`+caseGGML_FTYPE_MOSTLY_Q8_0:wtype=GGML_TYPE_Q8_0;break;`
	`4209`	`+caseGGML_FTYPE_UNKNOWN:wtype=GGML_TYPE_COUNT;break;`
	`4210`	`+caseGGML_FTYPE_MOSTLY_Q4_1_SOME_F16:wtype=GGML_TYPE_COUNT;break;`
	`4211`	`+ }`
	`4212`	`+`
	`4213`	`+GGML_ASSERT(wtype!=GGML_TYPE_COUNT);`
	`4214`	`+`
	`4215`	`+returnwtype;`
	`4216`	`+}`
	`4217`	`+`
`4060`	`4218`	`staticinlineboolggml_is_transposed(conststructggml_tensor*tensor) {`
`4061`	`4219`	`returntensor->nb[0]>tensor->nb[1];`
`4062`	`4220`	`}`

`‎ggml.h‎`

Lines changed: 17 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -232,6 +232,20 @@ extern "C" {`
`232`	`232`	`GGML_TYPE_COUNT,`
`233`	`233`	`};`
`234`	`234`
	`235`	`+// model file types`
	`236`	`+enumggml_ftype {`
	`237`	`+GGML_FTYPE_UNKNOWN=-1,`
	`238`	`+GGML_FTYPE_ALL_F32=0,`
	`239`	`+GGML_FTYPE_MOSTLY_F16=1,// except 1d tensors`
	`240`	`+GGML_FTYPE_MOSTLY_Q4_0=2,// except 1d tensors`
	`241`	`+GGML_FTYPE_MOSTLY_Q4_1=3,// except 1d tensors`
	`242`	`+GGML_FTYPE_MOSTLY_Q4_1_SOME_F16=4,// tok_embeddings.weight and output.weight are F16`
	`243`	`+GGML_FTYPE_MOSTLY_Q4_2=5,// except 1d tensors`
	`244`	`+GGML_FTYPE_MOSTLY_Q8_0=7,// except 1d tensors`
	`245`	`+GGML_FTYPE_MOSTLY_Q5_0=8,// except 1d tensors`
	`246`	`+GGML_FTYPE_MOSTLY_Q5_1=9,// except 1d tensors`
	`247`	`+ };`
	`248`	`+`
`235`	`249`	`// available tensor operations:`
`236`	`250`	`enumggml_op {`
`237`	`251`	`GGML_OP_NONE=0,`
`@@ -385,6 +399,9 @@ extern "C" {`
`385`	`399`
`386`	`400`	`GGML_APIboolggml_is_quantized(enumggml_typetype);`
`387`	`401`
	`402`	`+// TODO: temporary until model loading of ggml examples is refactored`
	`403`	`+GGML_APIenumggml_typeggml_ftype_to_ggml_type(enumggml_ftypeftype);`
	`404`	`+`
`388`	`405`	`// main`
`389`	`406`
`390`	`407`	`GGML_APIstructggml_context*ggml_init(structggml_init_paramsparams);`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit6bc4400

File tree

2 files changed

2 files changed

`‎ggml.c‎`

`‎ggml.h‎`

0 commit comments