Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit8199aed

Browse files
author
Vladlen Popolitov
committed
Merge branch 'master' into relaxed
2 parentsec359c1 +0b938f8 commit8199aed

File tree

2 files changed

+42
-18
lines changed

2 files changed

+42
-18
lines changed

‎src/halfutils.c‎

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,14 @@
88

99
#if defined(HAVE__GET_CPUID)
1010
#include<cpuid.h>
11-
#elif defined(HAVE__CPUID)
11+
#else
1212
#include<intrin.h>
1313
#endif
1414

1515
#ifdef_MSC_VER
16-
#defineTARGET_F16C_FMA
16+
#defineTARGET_F16C
1717
#else
18-
#defineTARGET_F16C_FMA __attribute__((target("f16c,fma")))
18+
#defineTARGET_F16C __attribute__((target("avx,f16c,fma")))
1919
#endif
2020
#endif
2121

@@ -40,8 +40,8 @@ HalfvecL2SquaredDistanceDefault(int dim, half * ax, half * bx)
4040
}
4141

4242
#ifdefHALFVEC_DISPATCH
43-
TARGET_F16C_FMAstaticfloat
44-
HalfvecL2SquaredDistanceF16cFma(intdim,half*ax,half*bx)
43+
TARGET_F16Cstaticfloat
44+
HalfvecL2SquaredDistanceF16c(intdim,half*ax,half*bx)
4545
{
4646
floatdistance;
4747
inti;
@@ -88,8 +88,8 @@ HalfvecInnerProductDefault(int dim, half * ax, half * bx)
8888
}
8989

9090
#ifdefHALFVEC_DISPATCH
91-
TARGET_F16C_FMAstaticfloat
92-
HalfvecInnerProductF16cFma(intdim,half*ax,half*bx)
91+
TARGET_F16Cstaticfloat
92+
HalfvecInnerProductF16c(intdim,half*ax,half*bx)
9393
{
9494
floatdistance;
9595
inti;
@@ -141,8 +141,8 @@ HalfvecCosineSimilarityDefault(int dim, half * ax, half * bx)
141141
}
142142

143143
#ifdefHALFVEC_DISPATCH
144-
TARGET_F16C_FMAstaticdouble
145-
HalfvecCosineSimilarityF16cFma(intdim,half*ax,half*bx)
144+
TARGET_F16Cstaticdouble
145+
HalfvecCosineSimilarityF16c(intdim,half*ax,half*bx)
146146
{
147147
floatsimilarity;
148148
floatnorma;
@@ -192,20 +192,37 @@ HalfvecCosineSimilarityF16cFma(int dim, half * ax, half * bx)
192192
#endif
193193

194194
#ifdefHALFVEC_DISPATCH
195-
#defineCPU_FEATURE_FMA (1 << 12)
196-
#defineCPU_FEATURE_F16C (1 << 29)
195+
#defineCPU_FEATURE_FMA (1 << 12)
196+
#defineCPU_FEATURE_OSXSAVE (1 << 27)
197+
#defineCPU_FEATURE_AVX (1 << 28)
198+
#defineCPU_FEATURE_F16C (1 << 29)
199+
200+
#ifdef_MSC_VER
201+
#defineTARGET_XSAVE
202+
#else
203+
#defineTARGET_XSAVE __attribute__((target("xsave")))
204+
#endif
197205

198-
staticbool
206+
TARGET_XSAVEstaticbool
199207
SupportsCpuFeature(unsignedintfeature)
200208
{
201209
unsignedintexx[4]= {0,0,0,0};
202210

203211
#if defined(HAVE__GET_CPUID)
204212
__get_cpuid(1,&exx[0],&exx[1],&exx[2],&exx[3]);
205-
#elif defined(HAVE__CPUID)
213+
#else
206214
__cpuid(exx,1);
207215
#endif
208216

217+
/* Check OS supports XSAVE */
218+
if ((exx[2]&CPU_FEATURE_OSXSAVE)!=CPU_FEATURE_OSXSAVE)
219+
return false;
220+
221+
/* Check XMM and YMM registers are enabled */
222+
if ((_xgetbv(0)&6)!=6)
223+
return false;
224+
225+
/* Now check features */
209226
return (exx[2]&feature)==feature;
210227
}
211228
#endif
@@ -222,11 +239,11 @@ HalfvecInit(void)
222239
HalfvecCosineSimilarity=HalfvecCosineSimilarityDefault;
223240

224241
#ifdefHALFVEC_DISPATCH
225-
if (SupportsCpuFeature(CPU_FEATURE_FMA |CPU_FEATURE_F16C))
242+
if (SupportsCpuFeature(CPU_FEATURE_AVX |CPU_FEATURE_F16C |CPU_FEATURE_FMA))
226243
{
227-
HalfvecL2SquaredDistance=HalfvecL2SquaredDistanceF16cFma;
228-
HalfvecInnerProduct=HalfvecInnerProductF16cFma;
229-
HalfvecCosineSimilarity=HalfvecCosineSimilarityF16cFma;
244+
HalfvecL2SquaredDistance=HalfvecL2SquaredDistanceF16c;
245+
HalfvecInnerProduct=HalfvecInnerProductF16c;
246+
HalfvecCosineSimilarity=HalfvecCosineSimilarityF16c;
230247
}
231248
#endif
232249
}

‎src/vector.c‎

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,14 @@
3434
#defineCreateStateDatums(dim) palloc(sizeof(Datum) * (dim + 1))
3535

3636
/* target_clones requires glibc */
37-
#if defined(__x86_64__)&& defined(__gnu_linux__)&& defined(__has_attribute)&&__has_attribute(target_clones)&& !defined(__FMA__)
37+
#if defined(__gnu_linux__)&& defined(__has_attribute)
38+
/* Use separate line for portability */
39+
#if__has_attribute(target_clones)
40+
#defineHAVE_TARGET_CLONES
41+
#endif
42+
#endif
43+
44+
#if defined(__x86_64__)&& defined(HAVE_TARGET_CLONES)&& !defined(__FMA__)
3845
#defineVECTOR_DISPATCH __attribute__((target_clones("default", "fma")))
3946
#else
4047
#defineVECTOR_DISPATCH

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp