Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Autoexposure example restoration#728

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Open
devshgraphicsprogramming wants to merge47 commits intomaster
base:master
Choose a base branch
Loading
fromautoexposue_ex
Open
Show file tree
Hide file tree
Changes fromall commits
Commits
Show all changes
47 commits
Select commitHold shift + click to select a range
096e09d
Add luma_meter and tonemapper
nipunG314Jul 19, 2024
4fd700f
Update submodule pointer
nipunG314Jul 19, 2024
f93bb0f
Merge branch 'master' of github.com:Devsh-Graphics-Programming/Nabla …
nipunG314Jul 20, 2024
6152f96
Merge branch 'master' of github.com:Devsh-Graphics-Programming/Nabla …
nipunG314Jul 24, 2024
2311521
Merge branch 'master' of github.com:Devsh-Graphics-Programming/Nabla …
nipunG314Jul 26, 2024
52e7ab2
Convert morton.h to hlsl
nipunG314Aug 1, 2024
1cc26bd
Fix HLSL morton code
nipunG314Aug 2, 2024
6922d0c
Create geom_luma_meter and computeLuma
nipunG314Aug 5, 2024
6e6eb64
Merge branch 'master' of github.com:Devsh-Graphics-Programming/Nabla …
nipunG314Aug 5, 2024
603a92f
Add gatherLuma method
nipunG314Aug 7, 2024
810a6ac
Add getGatheredLuma()
nipunG314Aug 8, 2024
69a73c1
Add reinhard and aces hlsl operators
nipunG314Aug 8, 2024
72e0bc5
Merge branch 'master' of github.com:Devsh-Graphics-Programming/Nabla …
nipunG314Aug 13, 2024
4c70cf5
cast mask values to correct type
nipunG314Aug 13, 2024
d9d6dd8
Add create methods to tonemapper params
nipunG314Aug 16, 2024
305f7e7
Remove getGatheredLuma from luma_meter
nipunG314Aug 16, 2024
3f4f6e9
Separate LumaMeteringWindow into a common header
nipunG314Aug 20, 2024
515512a
Simplify luma_meter naming
nipunG314Aug 20, 2024
77f5756
Merge branch 'master' of github.com:Devsh-Graphics-Programming/Nabla …
nipunG314Aug 20, 2024
1919e53
Simplify morton code
nipunG314Aug 20, 2024
4c58238
Add missing comment
nipunG314Aug 20, 2024
3c3f8b8
Refactor tonemapping operators
nipunG314Aug 20, 2024
b0e0750
Small fixes
nipunG314Aug 20, 2024
e8e46c9
Use promote to simplify code
nipunG314Aug 21, 2024
ee5affe
Add static create to MeteringWindow
nipunG314Aug 21, 2024
56389f4
Infer sample count from viewportSize
nipunG314Aug 21, 2024
23771d1
Rename gatherLuma, add toXYZ method and templatize the float type
nipunG314Aug 22, 2024
ac39039
Add uploadFloat, downloadFloat and gatherLuma
nipunG314Aug 26, 2024
49a8049
Normalize tileOffset and coord to uv before computing Luma
nipunG314Aug 27, 2024
8a10ae2
Simplify return statement
nipunG314Sep 29, 2024
6b01b6d
Update submodule pointers
nipunG314Dec 11, 2024
4129afe
Merge branch 'master' of github.com:Devsh-Graphics-Programming/Nabla …
nipunG314Dec 11, 2024
f95f1c1
Update submodule pointer
nipunG314Dec 11, 2024
1a58273
Update submodule pointer
nipunG314Dec 13, 2024
b6e1f57
Update submodule pointer
nipunG314Dec 13, 2024
5239c29
Update submodule pointer
nipunG314Jan 14, 2025
0df9ba6
Merge branch 'master' of github.com:Devsh-Graphics-Programming/Nabla …
nipunG314Jan 14, 2025
06c915e
stop rolling back my modules!
Jan 21, 2025
90d20c4
point submodule at head
Jan 21, 2025
26a4ed2
Merge branch 'master' of github.com:Devsh-Graphics-Programming/Nabla …
nipunG314Feb 22, 2025
4edd38c
Add capabilities for atomic ops
nipunG314Mar 13, 2025
f1e3e98
Fix luma_meter
nipunG314Mar 13, 2025
ce2ca41
Merge branch 'autoexposue_ex' of github.com:Devsh-Graphics-Programmin…
nipunG314Mar 13, 2025
f1b7d17
Add median_luma_meter
nipunG314Mar 16, 2025
83ac633
Update submodule pointer
nipunG314Mar 16, 2025
2b5e502
Make changes to luma_meter
nipunG314Mar 17, 2025
974beca
merge master, fix conflicts
keptsecretDec 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Some comments aren't visible on the classic Files Changed page.

636 changes: 636 additions & 0 deletionsinclude/nbl/asset/utils/IMeshPacker.h
View file
Open in desktop

Large diffs are not rendered by default.

35 changes: 35 additions & 0 deletionsinclude/nbl/builtin/hlsl/luma_meter/common.hlsl
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O.
// This file is part of the "Nabla Engine".
// For conditions of distribution and use, see copyright notice in nabla.h

#ifndef _NBL_BUILTIN_HLSL_LUMA_METER_COMMON_INCLUDED_
#define _NBL_BUILTIN_HLSL_LUMA_METER_COMMON_INCLUDED_

#include "nbl/builtin/hlsl/cpp_compat.hlsl"

namespace nbl
{
namespace hlsl
{
namespace luma_meter
{

struct MeteringWindow
{
using this_t = MeteringWindow;
float32_t2 meteringWindowScale;
float32_t2 meteringWindowOffset;

static this_t create(float32_t2 scale, float32_t2 offset) {
this_t retval;
retval.meteringWindowScale = scale;
retval.meteringWindowOffset = offset;
return retval;
}
};

}
}
}

#endif
287 changes: 287 additions & 0 deletionsinclude/nbl/builtin/hlsl/luma_meter/luma_meter.hlsl
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,287 @@
// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O.
// This file is part of the "Nabla Engine".
// For conditions of distribution and use, see copyright notice in nabla.h

#ifndef _NBL_BUILTIN_HLSL_LUMA_METER_INCLUDED_
#define _NBL_BUILTIN_HLSL_LUMA_METER_INCLUDED_

#include "nbl/builtin/hlsl/glsl_compat/core.hlsl"
#include "nbl/builtin/hlsl/glsl_compat/subgroup_basic.hlsl"
#include "nbl/builtin/hlsl/glsl_compat/subgroup_arithmetic.hlsl"
#include "nbl/builtin/hlsl/workgroup/basic.hlsl"
#include "nbl/builtin/hlsl/workgroup/arithmetic.hlsl"
#include "nbl/builtin/hlsl/type_traits.hlsl"
#include "nbl/builtin/hlsl/math/morton.hlsl"
#include "nbl/builtin/hlsl/luma_meter/common.hlsl"

namespace nbl
{
namespace hlsl
{
namespace luma_meter
{

template<uint32_t GroupSize, typename ValueAccessor, typename SharedAccessor, typename TexAccessor>
struct geom_meter {
using float_t = typename SharedAccessor::type;
using float_t2 = typename conditional<is_same_v<float_t, float32_t>, float32_t2, float16_t2>::type;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

even if doing color computation infloat16_t this doesn't free you from doing texture coordinate calc infloat32_t

using float_t3 = typename conditional<is_same_v<float_t, float32_t>, float32_t3, float16_t3>::type;
using this_t = geom_meter<GroupSize, ValueAccessor, SharedAccessor, TexAccessor>;

static this_t create(float_t2 lumaMinMax, float_t sampleCount)
{
this_t retval;
retval.lumaMinMax = lumaMinMax;
retval.sampleCount = sampleCount;
return retval;
}

float_t __reduction(float_t value, NBL_REF_ARG(SharedAccessor) sdata)
{
return workgroup::reduction < plus < float_t >, GroupSize >::
template __call <SharedAccessor>(value, sdata);
}

float_t __computeLumaLog2(
NBL_CONST_REF_ARG(MeteringWindow) window,
NBL_REF_ARG(TexAccessor) tex,
float_t2 shiftedCoord
)
{
float_t2 uvPos = shiftedCoord * window.meteringWindowScale + window.meteringWindowOffset;
float_t3 color = tex.get(uvPos);
float_t luma = (float_t)TexAccessor::toXYZ(color);

luma = clamp(luma, lumaMinMax.x, lumaMinMax.y);

return log2(luma);
}

void __uploadFloat(
NBL_REF_ARG(ValueAccessor) val_accessor,
float_t val,
float_t minLog2,
float_t rangeLog2
Comment on lines +63 to +64

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

should already be precomputed as members

)
{
uint32_t3 workGroupCount = glsl::gl_NumWorkGroups();

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

take the workgroup count and workgroup XYZ coordinate (or workgroup index) from outside (as function arguments) otherwise in the presence of solutions such as virtual workgroups or persistent threads, this whole thing will fall apart

uint32_t workgroupIndex = (workGroupCount.x * workGroupCount.y * workGroupCount.z) / 64;
uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2();

uint32_t lumaSumBitPattern = uint32_t(clamp((val - minLog2) * rangeLog2, 0.f, float32_t((1 << fixedPointBitsLeft) - 1)));
Comment on lines +69 to +71

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

lets write some docs for this....

Theval was produced by a workgroup reduction is performed of values in the[MinLog2,MaxLog2] range

Which makes thescaledLogLuma (the variable that should hold(val-minLog2)*rangeLog2) is between 0 and WorkGroupSize

This value is atomic added by N workgroups

You now want to represent it in Fixed Point during the atomic add, but not be vulnerable to overflow, this means the worst case is adding N times WorkGroupSize.

This means that we need to multiply the by(2^32-1)/N precomputed as a float or if you must round upN to PoT and see how many bits are left (512 workgroups, means 9 bits, so 23 are left). To avoid rounding precision errors, the PoT method is chosen.

I have no clue where you're getting+SubgroupSizeLog2 from.

Also the value of(1<<fixedPointBitsLeft)-1 must be precomputed increate and stored as a member

IT should be as easy as

constuint32_t scaledLumaLog2BitPattern =uint32_t((val-lumaMinLog2)*maxIncrement_over_lumaRangeLog2+float_t(0.5));

wheremaxIncrement = (0x1u<<(32u-uint32_t(ceil(log2(WorkGroupCount*WorkGroupSize)))))-1;


val_accessor.atomicAdd(workgroupIndex & ((1 << glsl::gl_SubgroupSizeLog2()) - 1), lumaSumBitPattern);
}

float_t __downloadFloat(
NBL_REF_ARG(ValueAccessor) val_accessor,
uint32_t index,
float_t minLog2,
float_t rangeLog2
)
{
float_t luma = (float_t)val_accessor.get(index & ((1 << glsl::gl_SubgroupSizeLog2()) - 1));
return luma / rangeLog2 + minLog2;
Comment on lines +83 to +84
Copy link
MemberAuthor

@devshgraphicsprogrammingdevshgraphicsprogrammingMar 17, 2025
edited
Loading

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

again, you're getting random floats based on workgroup index whichthankfully was always the same (rare case of two wrongs making a right)

Again if you wanted to stagger, you should use entire subgroup to load the values, then subgroup reduce them

just converting tofloat_t is not the correct way to decode, you should divide by themaxIncrement

}

void sampleLuma(
NBL_CONST_REF_ARG(MeteringWindow) window,
NBL_REF_ARG(ValueAccessor) val,
NBL_REF_ARG(TexAccessor) tex,
NBL_REF_ARG(SharedAccessor) sdata,
float_t2 tileOffset,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

why is tile Offset being provided from the outside? its a byproduct of your workgroupID, and workgroupSize-1 decoded as morton +1 in each dimension

float_t2 viewportSize
)
{
uint32_t tid = workgroup::SubgroupContiguousIndex();
uint32_t2 coord = {
morton2d_decode_x(tid),
morton2d_decode_y(tid)
};

float_t luma = 0.0f;
float_t2 shiftedCoord = (tileOffset + (float32_t2)(coord)) / viewportSize;
float_t lumaLog2 = __computeLumaLog2(window, tex, shiftedCoord);
float_t lumaLog2Sum = __reduction(lumaLog2, sdata);

if (tid == 0) {
__uploadFloat(
val,
lumaLog2Sum,
log2(lumaMinMax.x),
log2(lumaMinMax.y / lumaMinMax.x)
);
}
}

float_t gatherLuma(
NBL_REF_ARG(ValueAccessor) val
)
{
uint32_t tid = glsl::gl_SubgroupInvocationID();
float_t luma = glsl::subgroupAdd(
__downloadFloat(
val,
tid,
log2(lumaMinMax.x),
log2(lumaMinMax.y / lumaMinMax.x)
)
);

uint32_t3 workGroupCount = glsl::gl_NumWorkGroups();
uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2();
Comment on lines +131 to +132

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

you're supposed to normalize by the number of samples you took during the sampling step, yourworkGroupCount here is NOT that value, its the number of workgroups you're exposing with

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

You must precompute thefixedPointsBitsLeft in thecreate method (and it needs to know how many invocations you'll be running the sample step)


return (luma / (1 << fixedPointBitsLeft)) / sampleCount;
}

float_t sampleCount;
Comment on lines +134 to +137

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

you want to compute and store the reciprocal ofsampleCount and the1<<fixedPointBitsLeft

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

that was the purpose of thercpFirstPassWGCount variable in the old GLSL

float_t2 lumaMinMax;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

don't do weird things we used to do in GLSL (due to no scalar layout), have a separate variable for min and max

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

also you should have the min and max precomputed withlog2 already applied

};

template<uint32_t GroupSize, uint16_t BinCount, typename HistogramAccessor, typename SharedAccessor, typename TexAccessor>
struct median_meter {
using int_t = typename SharedAccessor::type;
using float_t = float32_t;
using float_t2 = typename conditional<is_same_v<float_t, float32_t>, float32_t2, float16_t2>::type;
using float_t3 = typename conditional<is_same_v<float_t, float32_t>, float32_t3, float16_t3>::type;
using this_t = median_meter<GroupSize, BinCount, HistogramAccessor, SharedAccessor, TexAccessor>;

static this_t create(float_t2 lumaMinMax) {
this_t retval;
retval.lumaMinMax = lumaMinMax;
return retval;
}

int_t __inclusive_scan(float_t value, NBL_REF_ARG(SharedAccessor) sdata) {
return workgroup::inclusive_scan < plus < int_t >, GroupSize >::
template __call <SharedAccessor>(value, sdata);
}

float_t __computeLuma(
NBL_CONST_REF_ARG(MeteringWindow) window,
NBL_REF_ARG(TexAccessor) tex,
float_t2 shiftedCoord
) {
float_t2 uvPos = shiftedCoord * window.meteringWindowScale + window.meteringWindowOffset;
float_t3 color = tex.get(uvPos);
float_t luma = (float_t)TexAccessor::toXYZ(color);

return clamp(luma, lumaMinMax.x, lumaMinMax.y);
}

int_t __float2Int(
float_t val,
float_t minLog2,
float_t rangeLog2
) {
uint32_t3 workGroupCount = glsl::gl_NumWorkGroups();
uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2();

return int_t(clamp((val - minLog2) * rangeLog2, 0.f, float32_t((1 << fixedPointBitsLeft) - 1)));
}

float_t __int2Float(
int_t val,
float_t minLog2,
float_t rangeLog2
) {
return val / rangeLog2 + minLog2;
}

void sampleLuma(
NBL_CONST_REF_ARG(MeteringWindow) window,
NBL_REF_ARG(HistogramAccessor) histo,
NBL_REF_ARG(TexAccessor) tex,
NBL_REF_ARG(SharedAccessor) sdata,
float_t2 tileOffset,
float_t2 viewportSize
) {
uint32_t tid = workgroup::SubgroupContiguousIndex();

for (uint32_t vid = tid; vid < BinCount; vid += GroupSize) {
sdata.set(vid, 0);
}

sdata.workgroupExecutionAndMemoryBarrier();

uint32_t2 coord = {
morton2d_decode_x(tid),
morton2d_decode_y(tid)
};

float_t luma = 0.0f;
float_t2 shiftedCoord = (tileOffset + (float32_t2)(coord)) / viewportSize;
luma = __computeLuma(window, tex, shiftedCoord);

float_t binSize = (lumaMinMax.y - lumaMinMax.x) / BinCount;
uint32_t binIndex = (uint32_t)((luma - lumaMinMax.x) / binSize);

sdata.atomicAdd(binIndex, float2Int(luma, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x));

sdata.workgroupExecutionAndMemoryBarrier();

float_t histogram_value;
sdata.get(tid, histogram_value);

sdata.workgroupExecutionAndMemoryBarrier();

float_t sum = inclusive_scan(histogram_value, sdata);
histo.atomicAdd(tid, float2Int(sum, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x));

const bool is_last_wg_invocation = tid == (GroupSize - 1);
const static uint32_t RoundedBinCount = 1 + (BinCount - 1) / GroupSize;

for (int i = 1; i < RoundedBinCount; i++) {
uint32_t keyBucketStart = GroupSize * i;
uint32_t vid = tid + keyBucketStart;

// no if statement about the last iteration needed
if (is_last_wg_invocation) {
float_t beforeSum;
sdata.get(keyBucketStart, beforeSum);
sdata.set(keyBucketStart, beforeSum + sum);
}

// propagate last block tail to next block head and protect against subsequent scans stepping on each other's toes
sdata.workgroupExecutionAndMemoryBarrier();

// no aliasing anymore
float_t atVid;
sdata.get(vid, atVid);
sum = inclusive_scan(atVid, sdata);
if (vid < BinCount) {
histo.atomicAdd(vid, __float2Int(sum, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x));
}
}
}

float_t gatherLuma(
NBL_REF_ARG(HistogramAccessor) histo,
NBL_REF_ARG(SharedAccessor) sdata
) {
uint32_t tid = workgroup::SubgroupContiguousIndex();

for (uint32_t vid = tid; vid < BinCount; vid += GroupSize) {
sdata.set(
vid,
histo.get(vid & (BinCount - 1))
);
}

sdata.workgroupExecutionAndMemoryBarrier();

uint32_t percentile40, percentile60;
sdata.get(BinCount * 0.4, percentile40);
sdata.get(BinCount * 0.6, percentile60);

return (__int2Float(percentile40, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x) + __int2Float(percentile60, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)) / 2;
}

float_t2 lumaMinMax;
};

}
}
}

#endif
Loading
Loading

[8]ページ先頭

©2009-2025 Movatter.jp