Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings
This repository was archived by the owner on Jan 23, 2023. It is now read-only.
/coreclrPublic archive

Implement the SSE hardware intrinsics.#15538

Merged
tannergooding merged 32 commits intodotnet:masterfromtannergooding:sse-intrinsics
Jan 17, 2018
Merged
Show file tree
Hide file tree
Changes from1 commit
Commits
Show all changes
32 commits
Select commitHold shift + click to select a range
bd56bb3
Adding the remaining SSE intrinsics to hwintrinsiclistxarch.h
tannergoodingDec 15, 2017
adadee1
Adding support for the SSE And, AndNot, Divide, Max, Min, MoveHighToL…
tannergoodingDec 15, 2017
1cb5722
Adding tests for the And, AndNot, Divide, Max, Min, MoveHighToLow, Mo…
tannergoodingDec 15, 2017
402550c
Adding support for the SSE compare eq, gt, ge, lt, le, ne, ord, and u…
tannergoodingDec 15, 2017
77fa074
Adding tests for the SSE compare eq, gt, ge, lt, le, ne, ord, and uno…
tannergoodingDec 15, 2017
91c7550
Adding support for the SSE Reciprocal, ReciprocalSqrt, and Sqrt intri…
tannergoodingDec 24, 2017
c09ad38
Adding tests for the SSE Reciprocal, ReciprocalSqrt, and Sqrt intrinsics
tannergoodingDec 24, 2017
3f115ef
Adding support for the SSE Set, SetAll, and SetZero intrinsics
tannergoodingDec 25, 2017
fbc91fc
Adding tests for the SSE Set, SetAll, and SetZero intrinsics
tannergoodingDec 25, 2017
94f82fb
Adding support for the SSE Shuffle intrinsic
tannergoodingDec 24, 2017
9e54585
Adding tests for the SSE Shuffle intrinsic
tannergoodingDec 24, 2017
224b8dc
Adding support for the SSE StaticCast intrinsic
tannergoodingDec 28, 2017
1283d87
Adding tests for the SSE StaticCast intrinsic
tannergoodingDec 28, 2017
bfc992e
Adding support for the SSE Add, Divide, Max, Min, Move, Multiply, and…
tannergoodingDec 29, 2017
38af536
Adding tests for the SSE Add, Divide, Max, Min, Move, Multiply, and S…
tannergoodingDec 29, 2017
eaf9aef
Adding support for the SSE compare eq, gt, ge, lt, le, ne, ord, and u…
tannergoodingDec 29, 2017
cd60a85
Adding tests for the SSE compare eq, gt, ge, lt, le, ne, ord, and uno…
tannergoodingDec 29, 2017
3fcdaf8
Adding support for the SSE Reciprocal, ReciprocalSqrt, and Sqrt scala…
tannergoodingDec 29, 2017
db75c98
Adding tests for the SSE Reciprocal, ReciprocalSqrt, and Sqrt scalar …
tannergoodingDec 29, 2017
e84b55e
Adding support for the SSE ConvertTo Int32, Int32WithTruncation, Int6…
tannergoodingDec 30, 2017
2a256bd
Adding tests for the SSE ConvertTo Int32, Int32WithTruncation, Int64W…
tannergoodingDec 30, 2017
887d5c4
Adding support for the SSE Compare<op>Ordered and Compare<op>Unordere…
tannergoodingDec 30, 2017
0817912
Adding tests for the SSE Compare<op>Ordered and Compare<op>Unordered …
tannergoodingDec 31, 2017
5ef844b
Adding support for the SSE Set scalar intrinsic
tannergoodingDec 31, 2017
dee8fb7
Adding tests for the SSE Set scalar intrinsic
tannergoodingDec 31, 2017
baeed0c
Adding support for the SSE MoveMask intrinsic
tannergoodingJan 12, 2018
51a1a59
Adding tests for the SSE MoveMask intrinsic
tannergoodingJan 12, 2018
a15aa12
Updating the SSE HWIntrinsics to share code where possible.
tannergoodingJan 13, 2018
5ca9417
Updating most of the SSE Compare intrinsics to support containment
tannergoodingJan 13, 2018
677c5c3
Adding support for the SSE Load, LoadAligned, LoadHigh, LoadLow, and …
tannergoodingJan 16, 2018
11b6ac8
Adding tests for the SSE Load, LoadAligned, LoadHigh, LoadLow, and Lo…
tannergoodingJan 16, 2018
a8db845
Resolving PR feedback
tannergoodingJan 17, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
PrevPrevious commit
NextNext commit
Adding support for the SSE Shuffle intrinsic
  • Loading branch information
@tannergooding
tannergooding committedJan 17, 2018
commit94f82fb98657a27165846d5f18a1afee4efc88cd
7 changes: 7 additions & 0 deletionssrc/jit/compiler.h
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -2065,6 +2065,13 @@ class Compiler
var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size);
GenTreeHWIntrinsic* gtNewSimdHWIntrinsicNode(
var_types type, GenTree* op1, GenTree* op2, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size);
GenTreeHWIntrinsic* gtNewSimdHWIntrinsicNode(var_types type,
GenTree* op1,
GenTree* op2,
GenTree* op3,
NamedIntrinsic hwIntrinsicID,
var_types baseType,
unsigned size);
GenTreeHWIntrinsic* gtNewSimdHWIntrinsicNode(var_types type,
GenTree* op1,
GenTree* op2,
Expand Down
12 changes: 12 additions & 0 deletionssrc/jit/gentree.cpp
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -17921,6 +17921,18 @@ GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(
return new (this, GT_HWIntrinsic) GenTreeHWIntrinsic(type, op1, op2, hwIntrinsicID, baseType, size);
}

GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type,
GenTree* op1,
GenTree* op2,
GenTree* op3,
NamedIntrinsic hwIntrinsicID,
var_types baseType,
unsigned size)
{
return new (this, GT_HWIntrinsic)
GenTreeHWIntrinsic(type, gtNewArgList(op1, op2, op3), hwIntrinsicID, baseType, size);
}

GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type,
GenTree* op1,
GenTree* op2,
Expand Down
89 changes: 89 additions & 0 deletionssrc/jit/hwintrinsiccodegenxarch.cpp
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -353,6 +353,95 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
emit->emitIns_SIMD_R_R_R(INS_xorps, targetReg, targetReg, targetReg, TYP_SIMD16);
break;

case NI_SSE_Shuffle:
{
GenTreeArgList* argList;

// Shuffle takes 3 operands, so op1 should be an arg list with two
// additional node in the chain.
assert(baseType == TYP_FLOAT);
assert(op1->OperIsList());
assert(op1->AsArgList()->Rest() != nullptr);
assert(op1->AsArgList()->Rest()->Rest() != nullptr);
assert(op1->AsArgList()->Rest()->Rest()->Rest() == nullptr);
assert(op2 == nullptr);

argList = op1->AsArgList();
op1 = argList->Current();
op1Reg = op1->gtRegNum;
genConsumeRegs(op1);

argList = argList->Rest();
op2 = argList->Current();
op2Reg = op2->gtRegNum;
genConsumeRegs(op2);

argList = argList->Rest();
op3 = argList->Current();
genConsumeRegs(op3);

if (op3->IsCnsIntOrI())
{
ssize_t ival = op3->AsIntConCommon()->IconValue();
emit->emitIns_SIMD_R_R_R_I(INS_shufps, targetReg, op1Reg, op2Reg, (int)ival, TYP_SIMD16);
}
else
{
// We emit a fallback case for the scenario when op3 is not a constant. This should normally
// happen when the intrinsic is called indirectly, such as via Reflection. However, it can
// also occur if the consumer calls it directly and just doesn't pass a constant value.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

This looks great, though I'm wondering if it will be productive to try to abstract this so that we don't have to duplicate so much of this for other "immediate-only" instructions.

Copy link
MemberAuthor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

I think it might be good to abstract it as well.


const unsigned jmpCount = 256;
BasicBlock* jmpTable[jmpCount];

unsigned jmpTableBase = emit->emitBBTableDataGenBeg(jmpCount, true);
unsigned jmpTableOffs = 0;

// Emit the jump table

JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTableBase);

for (unsigned i = 0; i < jmpCount; i++)
{
jmpTable[i] = genCreateTempLabel();
JITDUMP(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, jmpTable[i]->bbNum);
emit->emitDataGenData(i, jmpTable[i]);
}

emit->emitDataGenEnd();

// Compute and jump to the appropriate offset in the switch table

regNumber baseReg = node->ExtractTempReg(); // the start of the switch table
regNumber offsReg = node->GetSingleTempReg(); // the offset into the switch table

emit->emitIns_R_C(INS_lea, emitTypeSize(TYP_I_IMPL), offsReg, compiler->eeFindJitDataOffs(jmpTableBase),
0);

emit->emitIns_R_ARX(INS_mov, EA_4BYTE, offsReg, offsReg, op3->gtRegNum, 4, 0);
emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, compiler->fgFirstBB, baseReg);
emit->emitIns_R_R(INS_add, EA_PTRSIZE, offsReg, baseReg);
emit->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), offsReg);

// Emit the switch table entries

BasicBlock* switchTableBeg = genCreateTempLabel();
BasicBlock* switchTableEnd = genCreateTempLabel();

genDefineTempLabel(switchTableBeg);

for (unsigned i = 0; i < jmpCount; i++)
{
genDefineTempLabel(jmpTable[i]);
emit->emitIns_SIMD_R_R_R_I(INS_shufps, targetReg, op1Reg, op2Reg, i, TYP_SIMD16);
emit->emitIns_J(INS_jmp, switchTableEnd);
}

genDefineTempLabel(switchTableEnd);
}
break;
}

case NI_SSE_Sqrt:
assert(baseType == TYP_FLOAT);
assert(op2 == nullptr);
Expand Down
26 changes: 25 additions & 1 deletionsrc/jit/hwintrinsicxarch.cpp
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -468,11 +468,35 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic,
GenTree* left = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op4, op3, NI_SSE_UnpackLow, TYP_FLOAT, 16);
GenTree* right = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE_UnpackLow, TYP_FLOAT, 16);
GenTree* control = gtNewIconNode(68, TYP_UBYTE);

retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, left, right, control, NI_SSE_Shuffle, TYP_FLOAT, 16);
break;
}

case NI_SSE_Shuffle:
assert(sig->numArgs == 3);
assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);

op3 = impStackTop().val;

if (op3->IsCnsIntOrI() || mustExpand)
{
impPopStack(); // Pop the value we peeked at
op2 = impSIMDPopStack(TYP_SIMD16);
op1 = impSIMDPopStack(TYP_SIMD16);
retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, intrinsic, TYP_FLOAT, 16);
}
else
{
// When op3 is not a constant and we are not being forced to expand, we need to
// return nullptr so a GT_CALL to the intrinsic method is emitted instead. The
// intrinsic method is recursive and will be forced to expand, at which point
// we emit some less efficient fallback code.

return nullptr;
}
break;

case NI_SSE_Add:
case NI_SSE_And:
case NI_SSE_AndNot:
Expand Down
12 changes: 12 additions & 0 deletionssrc/jit/lowerxarch.cpp
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -2337,6 +2337,18 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_SSE_Shuffle:
{
assert(op1->OperIsList());
GenTree* op3 = op1->AsArgList()->Rest()->Rest()->Current();

if (op3->IsCnsIntOrI())
{
MakeSrcContained(node, op3);
}
break;
}

default:
assert((intrinsicID > NI_HW_INTRINSIC_START) && (intrinsicID < NI_HW_INTRINSIC_END));
break;
Expand Down
21 changes: 21 additions & 0 deletionssrc/jit/lsraxarch.cpp
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -2534,13 +2534,34 @@ void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree,
info->srcCount += GetOperandInfo(op1);
}
}

if (op2 != nullptr)
{
info->srcCount += GetOperandInfo(op2);
}

switch (intrinsicID)
{
case NI_SSE_Shuffle:
{
assert(op1->OperIsList());
GenTree* op3 = op1->AsArgList()->Rest()->Rest()->Current();

if (!op3->isContainedIntOrIImmed())
{
assert(!op3->IsCnsIntOrI());

// We need two extra reg when op3 isn't a constant so
// the offset into the jump table for the fallback path
// can be computed.

info->internalIntCount = 2;
info->setInternalCandidates(this, allRegs(TYP_INT));
break;
}
break;
}

#ifdef _TARGET_X86_
case NI_SSE42_Crc32:
{
Expand Down

[8]ページ先頭

©2009-2026 Movatter.jp