Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb3be75f

Browse files
committed
ComputeBoundingSphere SIMD implementation
1 parent21883ab commitb3be75f

File tree

1 file changed

+144
-22
lines changed
  • Sources/Overload/OvRendering/src/OvRendering/Resources

1 file changed

+144
-22
lines changed

‎Sources/Overload/OvRendering/src/OvRendering/Resources/Mesh.cpp‎

Lines changed: 144 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
*/
66

77
#include<array>
8+
#include<intrin.h>
89

910
#include<OvDebug/Logger.h>
1011
#include<OvRendering/Resources/Mesh.h>
@@ -81,38 +82,159 @@ void OvRendering::Resources::Mesh::Upload(std::span<const Geometry::Vertex> p_ve
8182
}
8283
}
8384

84-
voidOvRendering::Resources::Mesh::ComputeBoundingSphere(std::span<const Geometry::Vertex> p_vertices)
85+
namespace
8586
{
86-
m_boundingSphere.position = OvMaths::FVector3::Zero;
87-
m_boundingSphere.radius =0.0f;
88-
89-
if (!p_vertices.empty())
87+
OvRendering::Geometry::BoundingSphereComputeBoundingSphereSIMD(std::span<const OvRendering::Geometry::Vertex> p_vertices)
9088
{
91-
float minX = std::numeric_limits<float>::max();
92-
float minY = std::numeric_limits<float>::max();
93-
float minZ = std::numeric_limits<float>::max();
89+
constsize_t vertexCount = p_vertices.size();
90+
91+
if (vertexCount ==0)
92+
{
93+
return {
94+
.position = OvMaths::FVector3::Zero,
95+
.radius =0.0f
96+
};
97+
}
9498

95-
float maxX = std::numeric_limits<float>::min();
96-
float maxY =std::numeric_limits<float>::min();
97-
float maxZ =std::numeric_limits<float>::min();
99+
// Initialize SIMD registers formin/max with first vertex values
100+
__m128 vMinXYZ =_mm_setr_ps(p_vertices[0].position[0], p_vertices[0].position[1], p_vertices[0].position[2], FLT_MAX);
101+
__m128 vMaxXYZ =_mm_setr_ps(p_vertices[0].position[0], p_vertices[0].position[1], p_vertices[0].position[2], -FLT_MAX);
98102

99-
for (constauto& vertex : p_vertices)
103+
// Process all vertices in one loop to find min/max
104+
for (size_t i =1; i < vertexCount; ++i)
100105
{
101-
minX =std::min(minX, vertex.position[0]);
102-
minY =std::min(minY, vertex.position[1]);
103-
minZ =std::min(minZ, vertex.position[2]);
106+
// Load vertex position directly - assumes position is aligned properly
107+
constfloat* posPtr = p_vertices[i].position;
108+
__m128 vPos =_mm_loadu_ps(posPtr);// Using loadu in case it's not 16-byte aligned
109+
110+
// Update min and max in one pass
111+
vMinXYZ =_mm_min_ps(vMinXYZ, vPos);
112+
vMaxXYZ =_mm_max_ps(vMaxXYZ, vPos);
113+
}
114+
115+
// Calculate center = (min + max) * 0.5
116+
__m128 vCenter =_mm_mul_ps(_mm_add_ps(vMinXYZ, vMaxXYZ),_mm_set1_ps(0.5f));
117+
118+
// Store center position
119+
float centerArr[4];
120+
_mm_store_ps(centerArr, vCenter);
121+
auto center = OvMaths::FVector3{ centerArr[0], centerArr[1], centerArr[2] };
122+
123+
// Calculate radius - use dot product for distance calculation
124+
__m128 vMaxDistSq =_mm_setzero_ps();
125+
126+
// Pre-load center vector once outside the loop
127+
const __m128 vCenterXYZ =_mm_setr_ps(
128+
center.x,
129+
center.y,
130+
center.z,
131+
0.0f
132+
);
133+
134+
// Unroll the loop by 4 for better throughput
135+
size_t i =0;
136+
constsize_t unrollCount = vertexCount & ~3ull;// Round down to multiple of 4
137+
138+
for (; i < unrollCount; i +=4)
139+
{
140+
// Load 4 vertices at once
141+
constfloat* pos0 = p_vertices[i].position;
142+
constfloat* pos1 = p_vertices[i +1].position;
143+
constfloat* pos2 = p_vertices[i +2].position;
144+
constfloat* pos3 = p_vertices[i +3].position;
145+
146+
__m128 vPos0 =_mm_loadu_ps(pos0);
147+
__m128 vDiff0 =_mm_sub_ps(vPos0, vCenterXYZ);
148+
__m128 vDistSq0 =_mm_dp_ps(vDiff0, vDiff0,0x77);// Dot product with mask 0x77 (sum xyz, store in all)
149+
vMaxDistSq =_mm_max_ps(vMaxDistSq, vDistSq0);
150+
151+
__m128 vPos1 =_mm_loadu_ps(pos1);
152+
__m128 vDiff1 =_mm_sub_ps(vPos1, vCenterXYZ);
153+
__m128 vDistSq1 =_mm_dp_ps(vDiff1, vDiff1,0x77);
154+
vMaxDistSq =_mm_max_ps(vMaxDistSq, vDistSq1);
155+
156+
__m128 vPos2 =_mm_loadu_ps(pos2);
157+
__m128 vDiff2 =_mm_sub_ps(vPos2, vCenterXYZ);
158+
__m128 vDistSq2 =_mm_dp_ps(vDiff2, vDiff2,0x77);
159+
vMaxDistSq =_mm_max_ps(vMaxDistSq, vDistSq2);
160+
161+
__m128 vPos3 =_mm_loadu_ps(pos3);
162+
__m128 vDiff3 =_mm_sub_ps(vPos3, vCenterXYZ);
163+
__m128 vDistSq3 =_mm_dp_ps(vDiff3, vDiff3,0x77);
164+
vMaxDistSq =_mm_max_ps(vMaxDistSq, vDistSq3);
165+
}
104166

105-
maxX =std::max(maxX, vertex.position[0]);
106-
maxY =std::max(maxY, vertex.position[1]);
107-
maxZ =std::max(maxZ, vertex.position[2]);
167+
// Handle remaining vertices
168+
for (; i < vertexCount; ++i)
169+
{
170+
constfloat* pos = p_vertices[i].position;
171+
__m128 vPos =_mm_loadu_ps(pos);
172+
__m128 vDiff =_mm_sub_ps(vPos, vCenterXYZ);
173+
__m128 vDistSq =_mm_dp_ps(vDiff, vDiff,0x77);
174+
vMaxDistSq =_mm_max_ps(vMaxDistSq, vDistSq);
108175
}
109176

110-
m_boundingSphere.position = OvMaths::FVector3{ minX + maxX, minY + maxY, minZ + maxZ } /2.0f;
177+
// Extract radius (sqrt of max squared distance)
178+
float maxDistSq;
179+
_mm_store_ss(&maxDistSq, vMaxDistSq);
180+
181+
return {
182+
.position = center,
183+
.radius =std::sqrt(maxDistSq)
184+
};
185+
}
186+
187+
OvRendering::Geometry::BoundingSphereComputeBoundingSphereRegular(std::span<const OvRendering::Geometry::Vertex> p_vertices)
188+
{
189+
auto result = OvRendering::Geometry::BoundingSphere{
190+
.position = OvMaths::FVector3::Zero,
191+
.radius =0.0f
192+
};
111193

112-
for (constauto& vertex :p_vertices)
194+
if (!p_vertices.empty())
113195
{
114-
constauto& position =reinterpret_cast<const OvMaths::FVector3&>(vertex.position);
115-
m_boundingSphere.radius =std::max(m_boundingSphere.radius,OvMaths::FVector3::Distance(m_boundingSphere.position, position));
196+
float minX = std::numeric_limits<float>::max();
197+
float minY = std::numeric_limits<float>::max();
198+
float minZ = std::numeric_limits<float>::max();
199+
200+
float maxX = std::numeric_limits<float>::min();
201+
float maxY = std::numeric_limits<float>::min();
202+
float maxZ = std::numeric_limits<float>::min();
203+
204+
for (constauto& vertex : p_vertices)
205+
{
206+
minX =std::min(minX, vertex.position[0]);
207+
minY =std::min(minY, vertex.position[1]);
208+
minZ =std::min(minZ, vertex.position[2]);
209+
210+
maxX =std::max(maxX, vertex.position[0]);
211+
maxY =std::max(maxY, vertex.position[1]);
212+
maxZ =std::max(maxZ, vertex.position[2]);
213+
}
214+
215+
result.position = OvMaths::FVector3{ minX + maxX, minY + maxY, minZ + maxZ } /2.0f;
216+
217+
for (constauto& vertex : p_vertices)
218+
{
219+
constauto& position =reinterpret_cast<const OvMaths::FVector3&>(vertex.position);
220+
result.radius =std::max(result.radius,OvMaths::FVector3::Distance(result.position, position));
221+
}
116222
}
223+
224+
return result;
225+
}
226+
}
227+
228+
voidOvRendering::Resources::Mesh::ComputeBoundingSphere(std::span<const Geometry::Vertex> p_vertices)
229+
{
230+
constexprbool useSIMD =true;
231+
232+
ifconstexpr (useSIMD)
233+
{
234+
m_boundingSphere =ComputeBoundingSphereSIMD(p_vertices);
235+
}
236+
else
237+
{
238+
m_boundingSphere =ComputeBoundingSphereRegular(p_vertices);
117239
}
118240
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp