- Notifications
You must be signed in to change notification settings - Fork67
Autoexposure example restoration#728
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
base:master
Are you sure you want to change the base?
Uh oh!
There was an error while loading.Please reload this page.
Changes fromall commits
096e09d4fd700ff93bb0f6152f96231152152e7ab21cc26bd6922d0c6e6eb64603a92f810a6ac69a73c172e0bc54c70cf5d9d6dd8305f7e73f4f6e9515512a77f57561919e534c582383c3f8b8b0e0750e8e46c9ee5affe56389f423771d1ac3903949a80498a10ae26b01b6d4129afef95f1c11a58273b6e1f575239c290df9ba606c915e90d20c426a4ed24edd38cf1e3e98ce2ca41f1b7d1783ac6332b5e502974becaFile filter
Filter by extension
Conversations
Uh oh!
There was an error while loading.Please reload this page.
Jump to
Uh oh!
There was an error while loading.Please reload this page.
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Uh oh!
There was an error while loading.Please reload this page.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| // Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. | ||
| // This file is part of the "Nabla Engine". | ||
| // For conditions of distribution and use, see copyright notice in nabla.h | ||
| #ifndef _NBL_BUILTIN_HLSL_LUMA_METER_COMMON_INCLUDED_ | ||
| #define _NBL_BUILTIN_HLSL_LUMA_METER_COMMON_INCLUDED_ | ||
| #include "nbl/builtin/hlsl/cpp_compat.hlsl" | ||
| namespace nbl | ||
| { | ||
| namespace hlsl | ||
| { | ||
| namespace luma_meter | ||
| { | ||
| struct MeteringWindow | ||
| { | ||
| using this_t = MeteringWindow; | ||
| float32_t2 meteringWindowScale; | ||
| float32_t2 meteringWindowOffset; | ||
| static this_t create(float32_t2 scale, float32_t2 offset) { | ||
| this_t retval; | ||
| retval.meteringWindowScale = scale; | ||
| retval.meteringWindowOffset = offset; | ||
| return retval; | ||
| } | ||
| }; | ||
| } | ||
| } | ||
| } | ||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,287 @@ | ||
| // Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. | ||
| // This file is part of the "Nabla Engine". | ||
| // For conditions of distribution and use, see copyright notice in nabla.h | ||
| #ifndef _NBL_BUILTIN_HLSL_LUMA_METER_INCLUDED_ | ||
| #define _NBL_BUILTIN_HLSL_LUMA_METER_INCLUDED_ | ||
| #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" | ||
| #include "nbl/builtin/hlsl/glsl_compat/subgroup_basic.hlsl" | ||
| #include "nbl/builtin/hlsl/glsl_compat/subgroup_arithmetic.hlsl" | ||
| #include "nbl/builtin/hlsl/workgroup/basic.hlsl" | ||
| #include "nbl/builtin/hlsl/workgroup/arithmetic.hlsl" | ||
| #include "nbl/builtin/hlsl/type_traits.hlsl" | ||
| #include "nbl/builtin/hlsl/math/morton.hlsl" | ||
| #include "nbl/builtin/hlsl/luma_meter/common.hlsl" | ||
| namespace nbl | ||
| { | ||
| namespace hlsl | ||
| { | ||
| namespace luma_meter | ||
| { | ||
| template<uint32_t GroupSize, typename ValueAccessor, typename SharedAccessor, typename TexAccessor> | ||
| struct geom_meter { | ||
| using float_t = typename SharedAccessor::type; | ||
| using float_t2 = typename conditional<is_same_v<float_t, float32_t>, float32_t2, float16_t2>::type; | ||
MemberAuthor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. even if doing color computation in | ||
| using float_t3 = typename conditional<is_same_v<float_t, float32_t>, float32_t3, float16_t3>::type; | ||
| using this_t = geom_meter<GroupSize, ValueAccessor, SharedAccessor, TexAccessor>; | ||
| static this_t create(float_t2 lumaMinMax, float_t sampleCount) | ||
| { | ||
| this_t retval; | ||
| retval.lumaMinMax = lumaMinMax; | ||
| retval.sampleCount = sampleCount; | ||
| return retval; | ||
| } | ||
| float_t __reduction(float_t value, NBL_REF_ARG(SharedAccessor) sdata) | ||
| { | ||
| return workgroup::reduction < plus < float_t >, GroupSize >:: | ||
| template __call <SharedAccessor>(value, sdata); | ||
| } | ||
| float_t __computeLumaLog2( | ||
| NBL_CONST_REF_ARG(MeteringWindow) window, | ||
| NBL_REF_ARG(TexAccessor) tex, | ||
| float_t2 shiftedCoord | ||
| ) | ||
| { | ||
| float_t2 uvPos = shiftedCoord * window.meteringWindowScale + window.meteringWindowOffset; | ||
| float_t3 color = tex.get(uvPos); | ||
| float_t luma = (float_t)TexAccessor::toXYZ(color); | ||
| luma = clamp(luma, lumaMinMax.x, lumaMinMax.y); | ||
| return log2(luma); | ||
| } | ||
| void __uploadFloat( | ||
| NBL_REF_ARG(ValueAccessor) val_accessor, | ||
| float_t val, | ||
| float_t minLog2, | ||
| float_t rangeLog2 | ||
Comment on lines +63 to +64 MemberAuthor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. should already be precomputed as members | ||
| ) | ||
| { | ||
| uint32_t3 workGroupCount = glsl::gl_NumWorkGroups(); | ||
MemberAuthor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. take the workgroup count and workgroup XYZ coordinate (or workgroup index) from outside (as function arguments) otherwise in the presence of solutions such as virtual workgroups or persistent threads, this whole thing will fall apart | ||
| uint32_t workgroupIndex = (workGroupCount.x * workGroupCount.y * workGroupCount.z) / 64; | ||
| uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2(); | ||
| uint32_t lumaSumBitPattern = uint32_t(clamp((val - minLog2) * rangeLog2, 0.f, float32_t((1 << fixedPointBitsLeft) - 1))); | ||
Comment on lines +69 to +71 MemberAuthor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. lets write some docs for this.... The Which makes the This value is atomic added by N workgroups You now want to represent it in Fixed Point during the atomic add, but not be vulnerable to overflow, this means the worst case is adding N times WorkGroupSize. This means that we need to multiply the by I have no clue where you're getting Also the value of IT should be as easy as constuint32_t scaledLumaLog2BitPattern =uint32_t((val-lumaMinLog2)*maxIncrement_over_lumaRangeLog2+float_t(0.5)); where | ||
| val_accessor.atomicAdd(workgroupIndex & ((1 << glsl::gl_SubgroupSizeLog2()) - 1), lumaSumBitPattern); | ||
| } | ||
| float_t __downloadFloat( | ||
| NBL_REF_ARG(ValueAccessor) val_accessor, | ||
| uint32_t index, | ||
| float_t minLog2, | ||
| float_t rangeLog2 | ||
| ) | ||
| { | ||
| float_t luma = (float_t)val_accessor.get(index & ((1 << glsl::gl_SubgroupSizeLog2()) - 1)); | ||
| return luma / rangeLog2 + minLog2; | ||
Comment on lines +83 to +84 MemberAuthor
| ||
| } | ||
| void sampleLuma( | ||
| NBL_CONST_REF_ARG(MeteringWindow) window, | ||
| NBL_REF_ARG(ValueAccessor) val, | ||
| NBL_REF_ARG(TexAccessor) tex, | ||
| NBL_REF_ARG(SharedAccessor) sdata, | ||
| float_t2 tileOffset, | ||
MemberAuthor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. why is tile Offset being provided from the outside? its a byproduct of your workgroupID, and workgroupSize-1 decoded as morton +1 in each dimension | ||
| float_t2 viewportSize | ||
| ) | ||
| { | ||
| uint32_t tid = workgroup::SubgroupContiguousIndex(); | ||
| uint32_t2 coord = { | ||
| morton2d_decode_x(tid), | ||
| morton2d_decode_y(tid) | ||
| }; | ||
| float_t luma = 0.0f; | ||
| float_t2 shiftedCoord = (tileOffset + (float32_t2)(coord)) / viewportSize; | ||
| float_t lumaLog2 = __computeLumaLog2(window, tex, shiftedCoord); | ||
| float_t lumaLog2Sum = __reduction(lumaLog2, sdata); | ||
| if (tid == 0) { | ||
| __uploadFloat( | ||
| val, | ||
| lumaLog2Sum, | ||
| log2(lumaMinMax.x), | ||
| log2(lumaMinMax.y / lumaMinMax.x) | ||
| ); | ||
| } | ||
| } | ||
| float_t gatherLuma( | ||
| NBL_REF_ARG(ValueAccessor) val | ||
| ) | ||
| { | ||
| uint32_t tid = glsl::gl_SubgroupInvocationID(); | ||
| float_t luma = glsl::subgroupAdd( | ||
| __downloadFloat( | ||
| val, | ||
| tid, | ||
| log2(lumaMinMax.x), | ||
| log2(lumaMinMax.y / lumaMinMax.x) | ||
| ) | ||
| ); | ||
| uint32_t3 workGroupCount = glsl::gl_NumWorkGroups(); | ||
| uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2(); | ||
Comment on lines +131 to +132 MemberAuthor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. you're supposed to normalize by the number of samples you took during the sampling step, your MemberAuthor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. You must precompute the | ||
| return (luma / (1 << fixedPointBitsLeft)) / sampleCount; | ||
| } | ||
| float_t sampleCount; | ||
Comment on lines +134 to +137 MemberAuthor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. you want to compute and store the reciprocal of MemberAuthor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. that was the purpose of the | ||
| float_t2 lumaMinMax; | ||
MemberAuthor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. don't do weird things we used to do in GLSL (due to no scalar layout), have a separate variable for min and max MemberAuthor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. also you should have the min and max precomputed with | ||
| }; | ||
| template<uint32_t GroupSize, uint16_t BinCount, typename HistogramAccessor, typename SharedAccessor, typename TexAccessor> | ||
| struct median_meter { | ||
| using int_t = typename SharedAccessor::type; | ||
| using float_t = float32_t; | ||
| using float_t2 = typename conditional<is_same_v<float_t, float32_t>, float32_t2, float16_t2>::type; | ||
| using float_t3 = typename conditional<is_same_v<float_t, float32_t>, float32_t3, float16_t3>::type; | ||
| using this_t = median_meter<GroupSize, BinCount, HistogramAccessor, SharedAccessor, TexAccessor>; | ||
| static this_t create(float_t2 lumaMinMax) { | ||
| this_t retval; | ||
| retval.lumaMinMax = lumaMinMax; | ||
| return retval; | ||
| } | ||
| int_t __inclusive_scan(float_t value, NBL_REF_ARG(SharedAccessor) sdata) { | ||
| return workgroup::inclusive_scan < plus < int_t >, GroupSize >:: | ||
| template __call <SharedAccessor>(value, sdata); | ||
| } | ||
| float_t __computeLuma( | ||
| NBL_CONST_REF_ARG(MeteringWindow) window, | ||
| NBL_REF_ARG(TexAccessor) tex, | ||
| float_t2 shiftedCoord | ||
| ) { | ||
| float_t2 uvPos = shiftedCoord * window.meteringWindowScale + window.meteringWindowOffset; | ||
| float_t3 color = tex.get(uvPos); | ||
| float_t luma = (float_t)TexAccessor::toXYZ(color); | ||
| return clamp(luma, lumaMinMax.x, lumaMinMax.y); | ||
| } | ||
| int_t __float2Int( | ||
| float_t val, | ||
| float_t minLog2, | ||
| float_t rangeLog2 | ||
| ) { | ||
| uint32_t3 workGroupCount = glsl::gl_NumWorkGroups(); | ||
| uint32_t fixedPointBitsLeft = 32 - uint32_t(ceil(log2(workGroupCount.x * workGroupCount.y * workGroupCount.z))) + glsl::gl_SubgroupSizeLog2(); | ||
| return int_t(clamp((val - minLog2) * rangeLog2, 0.f, float32_t((1 << fixedPointBitsLeft) - 1))); | ||
| } | ||
| float_t __int2Float( | ||
| int_t val, | ||
| float_t minLog2, | ||
| float_t rangeLog2 | ||
| ) { | ||
| return val / rangeLog2 + minLog2; | ||
| } | ||
| void sampleLuma( | ||
| NBL_CONST_REF_ARG(MeteringWindow) window, | ||
| NBL_REF_ARG(HistogramAccessor) histo, | ||
| NBL_REF_ARG(TexAccessor) tex, | ||
| NBL_REF_ARG(SharedAccessor) sdata, | ||
| float_t2 tileOffset, | ||
| float_t2 viewportSize | ||
| ) { | ||
| uint32_t tid = workgroup::SubgroupContiguousIndex(); | ||
| for (uint32_t vid = tid; vid < BinCount; vid += GroupSize) { | ||
| sdata.set(vid, 0); | ||
| } | ||
| sdata.workgroupExecutionAndMemoryBarrier(); | ||
| uint32_t2 coord = { | ||
| morton2d_decode_x(tid), | ||
| morton2d_decode_y(tid) | ||
| }; | ||
| float_t luma = 0.0f; | ||
| float_t2 shiftedCoord = (tileOffset + (float32_t2)(coord)) / viewportSize; | ||
| luma = __computeLuma(window, tex, shiftedCoord); | ||
| float_t binSize = (lumaMinMax.y - lumaMinMax.x) / BinCount; | ||
| uint32_t binIndex = (uint32_t)((luma - lumaMinMax.x) / binSize); | ||
| sdata.atomicAdd(binIndex, float2Int(luma, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)); | ||
| sdata.workgroupExecutionAndMemoryBarrier(); | ||
| float_t histogram_value; | ||
| sdata.get(tid, histogram_value); | ||
| sdata.workgroupExecutionAndMemoryBarrier(); | ||
| float_t sum = inclusive_scan(histogram_value, sdata); | ||
| histo.atomicAdd(tid, float2Int(sum, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)); | ||
| const bool is_last_wg_invocation = tid == (GroupSize - 1); | ||
| const static uint32_t RoundedBinCount = 1 + (BinCount - 1) / GroupSize; | ||
| for (int i = 1; i < RoundedBinCount; i++) { | ||
| uint32_t keyBucketStart = GroupSize * i; | ||
| uint32_t vid = tid + keyBucketStart; | ||
| // no if statement about the last iteration needed | ||
| if (is_last_wg_invocation) { | ||
| float_t beforeSum; | ||
| sdata.get(keyBucketStart, beforeSum); | ||
| sdata.set(keyBucketStart, beforeSum + sum); | ||
| } | ||
| // propagate last block tail to next block head and protect against subsequent scans stepping on each other's toes | ||
| sdata.workgroupExecutionAndMemoryBarrier(); | ||
| // no aliasing anymore | ||
| float_t atVid; | ||
| sdata.get(vid, atVid); | ||
| sum = inclusive_scan(atVid, sdata); | ||
| if (vid < BinCount) { | ||
| histo.atomicAdd(vid, __float2Int(sum, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)); | ||
| } | ||
| } | ||
| } | ||
| float_t gatherLuma( | ||
| NBL_REF_ARG(HistogramAccessor) histo, | ||
| NBL_REF_ARG(SharedAccessor) sdata | ||
| ) { | ||
| uint32_t tid = workgroup::SubgroupContiguousIndex(); | ||
| for (uint32_t vid = tid; vid < BinCount; vid += GroupSize) { | ||
| sdata.set( | ||
| vid, | ||
| histo.get(vid & (BinCount - 1)) | ||
| ); | ||
| } | ||
| sdata.workgroupExecutionAndMemoryBarrier(); | ||
| uint32_t percentile40, percentile60; | ||
| sdata.get(BinCount * 0.4, percentile40); | ||
| sdata.get(BinCount * 0.6, percentile60); | ||
| return (__int2Float(percentile40, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x) + __int2Float(percentile60, lumaMinMax.x, lumaMinMax.y - lumaMinMax.x)) / 2; | ||
| } | ||
| float_t2 lumaMinMax; | ||
| }; | ||
| } | ||
| } | ||
| } | ||
| #endif | ||
Uh oh!
There was an error while loading.Please reload this page.
Uh oh!
There was an error while loading.Please reload this page.